Search in sources :

Example 26 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class VcfBiomart method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VCFIterator iter, final VariantContextWriter out) {
    HttpGet httpGet = null;
    final CharSplitter tab = CharSplitter.TAB;
    try {
        final TransformerFactory factory = TransformerFactory.newInstance();
        final Transformer transformer = factory.newTransformer();
        // transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
        final VCFHeader header = iter.getHeader();
        StringBuilder desc = new StringBuilder("Biomart query. Format: ");
        desc.append(this.attributes.stream().map(S -> this.printLabels ? S + "|" + S : S).collect(Collectors.joining("|")));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        header.addMetaDataLine(new VCFInfoHeaderLine(this.TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, desc.toString()));
        out.writeHeader(header);
        while (iter.hasNext()) {
            final VariantContext ctx = iter.next();
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.rmAttribute(this.TAG);
            this.filterColumnContig.set(ctx.getContig());
            this.filterColumnStart.set(String.valueOf(ctx.getStart()));
            this.filterColumnEnd.set(String.valueOf(ctx.getEnd()));
            final StringWriter domToStr = new StringWriter();
            transformer.transform(new DOMSource(this.domQuery), new StreamResult(domToStr));
            final URIBuilder builder = new URIBuilder(this.serviceUrl);
            builder.addParameter("query", domToStr.toString());
            // System.err.println("\nwget -O - 'http://grch37.ensembl.org/biomart/martservice?query="+escapedQuery+"'\n");
            // escapedQuery = URLEncoder.encode(escapedQuery,"UTF-8");
            httpGet = new HttpGet(builder.build());
            final CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
            int responseCode = httpResponse.getStatusLine().getStatusCode();
            if (responseCode != 200) {
                throw new RuntimeIOException("Response code was not 200. Detected response was " + responseCode);
            }
            InputStream response = httpResponse.getEntity().getContent();
            if (this.teeResponse) {
                response = new TeeInputStream(response, stderr(), false);
            }
            final BufferedReader br = new BufferedReader(new InputStreamReader(response));
            final Set<String> infoAtts = br.lines().filter(L -> !StringUtil.isBlank(L)).filter(L -> !L.equals("[success]")).map(L -> tab.split(L)).map(T -> {
                final StringBuilder sb = new StringBuilder();
                for (int i = 0; i < this.attributes.size(); i++) {
                    if (i > 0)
                        sb.append("|");
                    if (this.printLabels)
                        sb.append(escapeInfo(this.attributes.get(i))).append("|");
                    sb.append(i < T.length ? escapeInfo(T[i]) : "");
                }
                return sb.toString();
            }).collect(Collectors.toCollection(LinkedHashSet::new));
            CloserUtil.close(br);
            CloserUtil.close(response);
            CloserUtil.close(httpResponse);
            if (!infoAtts.isEmpty()) {
                vcb.attribute(this.TAG, new ArrayList<>(infoAtts));
            }
            out.add(vcb.make());
        }
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        throw new RuntimeIOException(err);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) Transformer(javax.xml.transform.Transformer) DOMSource(javax.xml.transform.dom.DOMSource) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) StreamResult(javax.xml.transform.stream.StreamResult) Attr(org.w3c.dom.Attr) ArrayList(java.util.ArrayList) StringUtil(htsjdk.samtools.util.StringUtil) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) Document(org.w3c.dom.Document) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) Node(org.w3c.dom.Node) LinkedHashSet(java.util.LinkedHashSet) CloserUtil(htsjdk.samtools.util.CloserUtil) TeeInputStream(com.github.lindenb.jvarkit.io.TeeInputStream) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) CloseableHttpClient(org.apache.http.impl.client.CloseableHttpClient) URIBuilder(org.apache.http.client.utils.URIBuilder) StringWriter(java.io.StringWriter) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) InputStreamReader(java.io.InputStreamReader) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) Element(org.w3c.dom.Element) HttpGet(org.apache.http.client.methods.HttpGet) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) DocumentBuilder(javax.xml.parsers.DocumentBuilder) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) BufferedReader(java.io.BufferedReader) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) TransformerFactory(javax.xml.transform.TransformerFactory) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) HttpClients(org.apache.http.impl.client.HttpClients) InputStream(java.io.InputStream) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DOMSource(javax.xml.transform.dom.DOMSource) Transformer(javax.xml.transform.Transformer) HttpGet(org.apache.http.client.methods.HttpGet) VariantContext(htsjdk.variant.variantcontext.VariantContext) StringWriter(java.io.StringWriter) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) VCFHeader(htsjdk.variant.vcf.VCFHeader) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) TransformerFactory(javax.xml.transform.TransformerFactory) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) StreamResult(javax.xml.transform.stream.StreamResult) InputStreamReader(java.io.InputStreamReader) TeeInputStream(com.github.lindenb.jvarkit.io.TeeInputStream) InputStream(java.io.InputStream) TeeInputStream(com.github.lindenb.jvarkit.io.TeeInputStream) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) URIBuilder(org.apache.http.client.utils.URIBuilder) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader)

Example 27 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class VcfFilterNotInPedigree method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VCFIterator iterin, VariantContextWriter out) {
    final int IGNORE_SINGLETON = -1;
    final Predicate<Genotype> acceptFilteredGenotype = G -> G != null && (this.ignoreFilteredGT == false || !G.isFiltered());
    final Pedigree pedigree;
    final VCFHeader header = iterin.getHeader();
    try {
        pedigree = new PedigreeParser().parse(this.pedigreeFile);
    } catch (final IOException err) {
        LOG.error("Cannot read pedigree in file: " + this.pedigreeFile, err);
        return -1;
    }
    if (pedigree.isEmpty()) {
        throw new JvarkitException.PedigreeError("No pedigree found in header. use VcfInjectPedigree to add it");
    }
    pedigree.checkUniqIds();
    final Set<String> samplesNames = new HashSet<>(header.getSampleNamesInOrder());
    final Set<Sample> individuals = new HashSet<>(pedigree.getSamples());
    final Iterator<Sample> iter = individuals.iterator();
    while (iter.hasNext()) {
        final Sample person = iter.next();
        if (!(samplesNames.contains(person.getId()))) {
            LOG.warn("Ignoring " + person + " because not in VCF header.");
            iter.remove();
        }
    }
    final VCFFilterHeaderLine filter = new VCFFilterHeaderLine(this.filterName, "Will be set for variant where the only genotypes non-homref are NOT in the pedigree ");
    final VCFFilterHeaderLine singletonFilter = new VCFFilterHeaderLine(this.singletonfilterName, "The ALT allele is found in less or equals than " + this.singleton + " individuals in the cases/controls");
    final VCFHeader h2 = new VCFHeader(header);
    JVarkitVersion.getInstance().addMetaData(this, h2);
    if (this.singleton != IGNORE_SINGLETON) {
        h2.addMetaDataLine(singletonFilter);
    }
    out.writeHeader(h2);
    while (iterin.hasNext()) {
        final VariantContext ctx = iterin.next();
        final boolean in_pedigree = individuals.stream().map(P -> ctx.getGenotype(P.getId())).filter(acceptFilteredGenotype).anyMatch(g -> (!(g == null || !g.isCalled() || !g.isAvailable() || g.isNoCall() || g.isHomRef())));
        if (!in_pedigree) {
            if (this.dicardVariant)
                continue;
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.filter(filter.getID());
            out.add(vcb.make());
        } else {
            boolean is_singleton;
            if (this.singleton != IGNORE_SINGLETON) {
                is_singleton = true;
                for (final Allele alt : ctx.getAlternateAlleles()) {
                    if (individuals.stream().map(P -> ctx.getGenotype(P.getId())).filter(acceptFilteredGenotype).filter(g -> g != null && g.isCalled() && g.getAlleles().contains(alt)).count() > this.singleton) {
                        is_singleton = false;
                        break;
                    }
                }
            } else {
                is_singleton = false;
            }
            if (is_singleton) {
                if (this.dicardVariant)
                    continue;
                final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
                vcb.filter(singletonFilter.getID());
                out.add(vcb.make());
            } else {
                out.add(ctx);
            }
        }
    }
    return 0;
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) Allele(htsjdk.variant.variantcontext.Allele) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) Iterator(java.util.Iterator) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) Predicate(java.util.function.Predicate) VCFHeader(htsjdk.variant.vcf.VCFHeader) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) HashSet(java.util.HashSet) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) Path(java.nio.file.Path) Sample(com.github.lindenb.jvarkit.pedigree.Sample) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Sample(com.github.lindenb.jvarkit.pedigree.Sample) Genotype(htsjdk.variant.variantcontext.Genotype) VariantContext(htsjdk.variant.variantcontext.VariantContext) IOException(java.io.IOException) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) Allele(htsjdk.variant.variantcontext.Allele) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet)

Example 28 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class VcfDoest method run.

private void run(final LineIterator lr, final PrintWriter pw) throws IOException {
    SortingCollection<TranscriptInfo> sorting = null;
    CloseableIterator<TranscriptInfo> iter2 = null;
    try {
        while (lr.hasNext()) {
            VCFIterator in = VCFUtils.createVCFIteratorFromLineIterator(lr, true);
            final VCFHeader header = in.getHeader();
            final Pedigree pedigree = Pedigree.newParser().parse(header);
            if (pedigree.isEmpty()) {
                throw new IOException("No pedigree found in header VCF header. use VcfInjectPedigree to add it");
            }
            final SortedSet<Pedigree.Person> individuals = new TreeSet<>();
            for (final Pedigree.Person individual : pedigree.getPersons()) {
                if (individual.isAffected() || individual.isUnaffected()) {
                    individuals.add(individual);
                }
            }
            boolean first = true;
            pw.println("# samples ( 0: unaffected 1:affected)");
            pw.print("population <- data.frame(family=c(");
            first = true;
            for (final Pedigree.Person person : individuals) {
                if (!first)
                    pw.print(",");
                pw.print("\"" + person.getFamily().getId() + "\"");
                first = false;
            }
            pw.print("),name=c(");
            first = true;
            for (final Pedigree.Person person : individuals) {
                if (!first)
                    pw.print(",");
                pw.print("\"" + person.getId() + "\"");
                first = false;
            }
            pw.print("),status=c(");
            first = true;
            for (final Pedigree.Person person : individuals) {
                if (!first)
                    pw.print(",");
                pw.print(person.isUnaffected() ? 0 : 1);
                first = false;
            }
            pw.println("))");
            sorting = SortingCollection.newInstance(TranscriptInfo.class, new TranscriptInfoCodec(), new TranscriptInfoCmp(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
            sorting.setDestructiveIteration(true);
            final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
            /**
             * loop over variants
             */
            while (in.hasNext() && !pw.checkError()) {
                final VariantContext ctx = progess.watch(in.next());
                if (ctx.isFiltered())
                    continue;
                if (ctx.getAlternateAlleles().isEmpty())
                    continue;
                final Allele altAllele = ctx.getAltAlleleWithHighestAlleleCount();
                final MafCalculator mafCalculator = new MafCalculator(altAllele, ctx.getContig());
                boolean genotyped = false;
                for (final Pedigree.Person p : pedigree.getPersons()) {
                    if (!(p.isAffected() || p.isUnaffected()))
                        continue;
                    final Genotype g = ctx.getGenotype(p.getId());
                    if (g == null)
                        throw new IOException("Strange I cannot find individual " + p + " in the pedigree. Aborting.");
                    if (g.isCalled()) {
                        mafCalculator.add(g, p.isMale());
                    }
                    if (g.isHet() || g.isHomVar()) {
                        if (!g.getAlleles().contains(altAllele))
                            continue;
                        genotyped = true;
                        break;
                    }
                }
                if (!genotyped)
                    continue;
                final Interval interval = new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd());
                final List<KnownGene> genes = this.overlap(interval);
                if (genes.isEmpty())
                    continue;
                for (final KnownGene kg : genes) {
                    final TranscriptInfo trInfo = new TranscriptInfo();
                    trInfo.contig = kg.getContig();
                    trInfo.txStart = kg.getTxStart();
                    trInfo.txEnd = kg.getTxEnd();
                    trInfo.transcriptName = kg.getName();
                    trInfo.strand = (byte) (kg.isPositiveStrand() ? '+' : '-');
                    trInfo.exonCount = kg.getExonCount();
                    trInfo.transcriptLength = kg.getTranscriptLength();
                    trInfo.ctxStart = ctx.getStart();
                    trInfo.ref = ctx.getReference();
                    trInfo.alt = altAllele;
                    trInfo.maf = mafCalculator.getMaf();
                    trInfo.genotypes = new byte[individuals.size()];
                    int idx = 0;
                    for (final Pedigree.Person individual : individuals) {
                        final Genotype genotype = ctx.getGenotype(individual.getId());
                        final byte b;
                        if (genotype.isHomRef()) {
                            b = 0;
                        } else if (genotype.isHomVar() && genotype.getAlleles().contains(altAllele)) {
                            b = 2;
                        } else if (genotype.isHet() && genotype.getAlleles().contains(altAllele) && genotype.getAlleles().contains(ctx.getReference())) {
                            b = 1;
                        } else /* we treat 0/2 has hom-ref */
                        if (genotype.isHet() && !genotype.getAlleles().contains(altAllele) && genotype.getAlleles().contains(ctx.getReference())) {
                            LOG.warn("Treating " + genotype + " as hom-ref (0) alt=" + altAllele);
                            b = 0;
                        } else /* we treat 2/2 has hom-ref */
                        if (genotype.isHomVar() && !genotype.getAlleles().contains(altAllele)) {
                            LOG.warn("Treating " + genotype + " as hom-ref (0) alt=" + altAllele);
                            b = 0;
                        } else {
                            b = -9;
                        }
                        trInfo.genotypes[idx] = b;
                        ++idx;
                    }
                    KnownGene archetype = kg;
                    /* find gene archetype = longest overlapping */
                    for (final KnownGene kg2 : genes) {
                        if (kg2 == kg)
                            continue;
                        if (archetype.getStrand().equals(kg2.getStrand()) && archetype.getTranscriptLength() < kg2.getTranscriptLength()) {
                            archetype = kg2;
                        }
                    }
                    trInfo.archetypeName = archetype.getName();
                    trInfo.archetypeLength = archetype.getTranscriptLength();
                    boolean ctxWasFoundInExon = false;
                    final int ctxPos0 = ctx.getStart() - 1;
                    int indexInTranscript0 = 0;
                    for (final KnownGene.Exon exon : kg.getExons()) {
                        // variant in exon ?
                        if (!(exon.getStart() > (ctx.getEnd() - 1) || (ctx.getStart() - 1) >= exon.getEnd())) {
                            ctxWasFoundInExon = true;
                            indexInTranscript0 += (ctxPos0 - exon.getStart());
                            if (kg.isNegativeStrand()) {
                                indexInTranscript0 = (kg.getTranscriptLength() - 1) - indexInTranscript0;
                            }
                            trInfo.indexInTranscript0 = indexInTranscript0;
                            trInfo.overlapName = exon.getName();
                            sorting.add(trInfo);
                            break;
                        } else {
                            indexInTranscript0 += (exon.getEnd() - exon.getStart());
                        }
                    }
                    if (ctxWasFoundInExon) {
                        continue;
                    }
                    indexInTranscript0 = 0;
                    // search closest intron/exon junction
                    for (int ex = 0; ex + 1 < kg.getExonCount(); ++ex) {
                        final KnownGene.Exon exon1 = kg.getExon(ex);
                        indexInTranscript0 += (exon1.getEnd() - exon1.getStart());
                        final KnownGene.Exon exon2 = kg.getExon(ex + 1);
                        if (exon1.getEnd() <= ctxPos0 && ctxPos0 < exon2.getStart()) {
                            final int dist_to_exon1 = ctxPos0 - exon1.getEnd();
                            final int dist_to_exon2 = exon2.getStart() - ctxPos0;
                            if (dist_to_exon2 < dist_to_exon1) {
                                indexInTranscript0++;
                            }
                            if (kg.isNegativeStrand()) {
                                indexInTranscript0 = (kg.getTranscriptLength() - 1) - indexInTranscript0;
                            }
                            trInfo.indexInTranscript0 = indexInTranscript0;
                            trInfo.overlapName = exon1.getNextIntron().getName();
                            sorting.add(trInfo);
                            break;
                        }
                    }
                }
            // end loop over genes
            }
            // end while loop over variants
            progess.finish();
            sorting.doneAdding();
            LOG.info("done adding");
            iter2 = sorting.iterator();
            final EqualRangeIterator<TranscriptInfo> eqiter = new EqualRangeIterator<TranscriptInfo>(iter2, new Comparator<TranscriptInfo>() {

                @Override
                public int compare(final TranscriptInfo o1, final TranscriptInfo o2) {
                    int i = o1.contig.compareTo(o2.contig);
                    if (i != 0)
                        return i;
                    i = o1.transcriptName.compareTo(o2.transcriptName);
                    return i;
                }
            });
            while (eqiter.hasNext()) {
                final List<TranscriptInfo> list = eqiter.next();
                final TranscriptInfo front = list.get(0);
                pw.println("# BEGIN TRANSCRIPT " + front.transcriptName + " ##########################################");
                pw.println("transcript.chrom <- \"" + front.contig + "\"");
                pw.println("transcript.txStart0 <- " + front.txStart + "");
                pw.println("transcript.txEnd0 <- " + front.txEnd + "");
                pw.println("transcript.name <- \"" + front.transcriptName + "\"");
                pw.println("transcript.strand <- \"" + ((char) front.strand) + "\"");
                pw.println("transcript.length <- " + front.transcriptLength + "");
                pw.println("transcript.exonCount <- " + front.exonCount + "");
                pw.println("archetype.name <- \"" + front.archetypeName + "\"");
                pw.println("archetype.length <- " + front.archetypeLength + "");
                pw.print("variants <- data.frame(chrom=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print("\"" + v.contig + "\"");
                    first = false;
                }
                pw.print("),chromStart=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print(v.ctxStart);
                    first = false;
                }
                pw.print("),chromEnd=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print(v.ctxStart + v.ref.length() - 1);
                    first = false;
                }
                pw.print("),refAllele=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print("\"" + v.ref.getDisplayString() + "\"");
                    first = false;
                }
                pw.print("),altAllele=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print("\"" + v.alt.getDisplayString() + "\"");
                    first = false;
                }
                pw.print("),positionInTranscript1=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print(v.indexInTranscript0 + 1);
                    first = false;
                }
                pw.print("),maf=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print(v.maf);
                    first = false;
                }
                pw.print("),overlapName=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print("\"" + v.overlapName + "\"");
                    first = false;
                }
                pw.println("))");
                pw.println("# genotypes as a list. Should be a multiple of length(samples).");
                pw.println("# 0 is homref (0/0), 1 is het (0/1), 2 is homvar (1/1)");
                pw.println("# if the variant contains another ALT allele: (0/2) and (2/2) are considered 0 (homref)");
                pw.print("genotypes <- c(");
                first = true;
                for (final TranscriptInfo tr : list) {
                    for (byte g : tr.genotypes) {
                        if (!first)
                            pw.print(",");
                        first = false;
                        pw.print((int) g);
                    }
                }
                pw.println(")");
                pw.println("stopifnot(NROW(variants) * NROW(population) == length(genotypes) )");
                if (this.userDefinedFunName == null || this.userDefinedFunName.trim().isEmpty()) {
                    pw.println("## WARNING not user-defined R function was defined");
                } else {
                    pw.println("# consumme data with user-defined R function ");
                    pw.println(this.userDefinedFunName + "()");
                }
                pw.println("# END TRANSCRIPT " + front.transcriptName + " ##########################################");
            }
            // end while eqiter
            eqiter.close();
            iter2.close();
            iter2 = null;
            sorting.cleanup();
            sorting = null;
        }
    } finally {
        CloserUtil.close(iter2);
        if (sorting != null)
            sorting.cleanup();
    }
}
Also used : VariantContext(htsjdk.variant.variantcontext.VariantContext) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) TreeSet(java.util.TreeSet) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFIterator(htsjdk.variant.vcf.VCFIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Genotype(htsjdk.variant.variantcontext.Genotype) IOException(java.io.IOException) Allele(htsjdk.variant.variantcontext.Allele) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene) Interval(htsjdk.samtools.util.Interval)

Example 29 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class VcfMoveFiltersToInfo method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VCFIterator in, final VariantContextWriter out) {
    final VCFHeader header = in.getHeader();
    final VCFHeader header2 = new VCFHeader(header);
    final VCFInfoHeaderLine infoHeaderLine = new VCFInfoHeaderLine(this.infoName.trim(), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Variant was previously FILTERed with the given values.");
    final Set<String> limitToThoseFilters = onlyThoseFiltersTagStr.stream().flatMap(S -> Arrays.asList(S.split("[, ]")).stream()).filter(S -> !StringUtil.isBlank(S)).collect(Collectors.toSet());
    if (header.getInfoHeaderLine(infoHeaderLine.getID()) != null) {
        throw new JvarkitException.UserError("INFO[" + infoHeaderLine.getID() + "] already exists in input VCF.");
    }
    header2.addMetaDataLine(infoHeaderLine);
    JVarkitVersion.getInstance().addMetaData(this, header2);
    out.writeHeader(header2);
    while (in.hasNext()) {
        final VariantContext ctx = in.next();
        if (ctx.isNotFiltered()) {
            out.add(ctx);
        } else {
            final Set<String> INFOfilters = new HashSet<>();
            final Set<String> FILTERfilters = new HashSet<>();
            for (final String filter : ctx.getFilters()) {
                if (filter.equals(VCFConstants.UNFILTERED) || filter.equals(VCFConstants.PASSES_FILTERS_v3) || filter.equals(VCFConstants.PASSES_FILTERS_v4)) {
                    continue;
                }
                if (!limitToThoseFilters.isEmpty() && !limitToThoseFilters.contains(filter)) {
                    FILTERfilters.add(filter);
                } else {
                    INFOfilters.add(filter);
                }
            }
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            if (FILTERfilters.isEmpty()) {
                vcb.passFilters();
            } else {
                vcb.filters(FILTERfilters);
            }
            if (!INFOfilters.isEmpty()) {
                vcb.attribute(infoHeaderLine.getID(), new ArrayList<>(INFOfilters));
            }
            out.add(vcb.make());
        }
    }
    out.close();
    return 0;
}
Also used : VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StringUtil(htsjdk.samtools.util.StringUtil) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFConstants(htsjdk.variant.vcf.VCFConstants) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) HashSet(java.util.HashSet)

Example 30 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class CaseControlPlot method doWork.

@Override
public int doWork(final List<String> args) {
    ArchiveFactory archiveFactory = null;
    VCFIterator in = null;
    VariantContextWriter teeVariantWriter = null;
    final List<CaseControlExtractor> excractors = new ArrayList<>();
    try {
        in = super.openVCFIterator(oneFileOrNull(args));
        final VCFHeader header = in.getHeader();
        excractors.addAll(parseConfigFile(header));
        final Pedigree pedigree;
        if (this.pedigreeFile != null) {
            pedigree = Pedigree.newParser().parse(this.pedigreeFile);
        } else {
            pedigree = Pedigree.newParser().parse(header);
        }
        if (pedigree == null || pedigree.isEmpty()) {
            LOG.error("No pedigree defined , or it is empty");
            return -1;
        }
        final Set<Pedigree.Person> casepersons = pedigree.getPersons().stream().filter(F -> F.isAffected() && header.getSampleNamesInOrder().indexOf(F.getId()) != -1).collect(Collectors.toSet());
        if (casepersons.isEmpty()) {
            LOG.error("No Affected individuals in pedigree/header");
            return -1;
        }
        final Set<Pedigree.Person> controlpersons = pedigree.getPersons().stream().filter(F -> F.isUnaffected() && header.getSampleNamesInOrder().indexOf(F.getId()) != -1).collect(Collectors.toSet());
        if (controlpersons.isEmpty()) {
            LOG.error("No Unaffected individuals in pedigree/header");
            return -1;
        }
        if (this.teeToStdout) {
            teeVariantWriter = super.openVariantContextWriter(null);
            teeVariantWriter.writeHeader(header);
        }
        archiveFactory = ArchiveFactory.open(this.outputDirOrZip);
        for (final CaseControlExtractor extractor : excractors) {
            extractor.pw = archiveFactory.openWriter(extractor.name);
        }
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        while (in.hasNext()) {
            final VariantContext ctx = progress.watch(in.next());
            if (teeVariantWriter != null)
                teeVariantWriter.add(ctx);
            for (final CaseControlExtractor handler : excractors) {
                handler.visit(ctx, casepersons, controlpersons);
            }
        }
        for (final CaseControlExtractor handler : excractors) {
            handler.close();
        }
        progress.finish();
        if (teeVariantWriter != null) {
            teeVariantWriter.close();
            teeVariantWriter = null;
        }
        in.close();
        in = null;
        archiveFactory.close();
        archiveFactory = null;
        return RETURN_OK;
    } catch (final Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(in);
        CloserUtil.close(archiveFactory);
        CloserUtil.close(teeVariantWriter);
        for (final CaseControlExtractor handler : excractors) {
            handler.close();
        }
    }
}
Also used : Allele(htsjdk.variant.variantcontext.Allele) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) StringUtil(htsjdk.samtools.util.StringUtil) Document(org.w3c.dom.Document) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Map(java.util.Map) Node(org.w3c.dom.Node) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) ScriptException(javax.script.ScriptException) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) AFExtractorFactory(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory) SimpleBindings(javax.script.SimpleBindings) List(java.util.List) Element(org.w3c.dom.Element) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) VariantContext(htsjdk.variant.variantcontext.VariantContext) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) AFExtractor(com.github.lindenb.jvarkit.util.vcf.AFExtractorFactory.AFExtractor) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) ScriptException(javax.script.ScriptException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFIterator(htsjdk.variant.vcf.VCFIterator)

Aggregations

VCFIterator (htsjdk.variant.vcf.VCFIterator)98 VariantContext (htsjdk.variant.variantcontext.VariantContext)83 VCFHeader (htsjdk.variant.vcf.VCFHeader)76 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)62 Parameter (com.beust.jcommander.Parameter)56 Program (com.github.lindenb.jvarkit.util.jcommander.Program)56 Logger (com.github.lindenb.jvarkit.util.log.Logger)56 ArrayList (java.util.ArrayList)52 List (java.util.List)52 Set (java.util.Set)49 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)46 Collectors (java.util.stream.Collectors)45 HashSet (java.util.HashSet)42 Path (java.nio.file.Path)39 IOException (java.io.IOException)37 Allele (htsjdk.variant.variantcontext.Allele)36 CloserUtil (htsjdk.samtools.util.CloserUtil)35 Genotype (htsjdk.variant.variantcontext.Genotype)33 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)32 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)32