Search in sources :

Example 16 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfBurdenFisherV method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter delegate) {
    final VariantContextWriter out = this.component.open(delegate);
    final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(in.getHeader()).logger(LOG);
    out.writeHeader(in.getHeader());
    while (in.hasNext()) {
        out.add(progess.watch(in.next()));
    }
    progess.finish();
    out.close();
    return 0;
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) DelegateVariantContextWriter(com.github.lindenb.jvarkit.util.vcf.DelegateVariantContextWriter) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter)

Example 17 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfBurdenSplitter method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, File outorNull) {
    SortingCollection<KeyAndLine> sortingcollection = null;
    BufferedReader in = null;
    CloseableIterator<KeyAndLine> iter = null;
    PrintStream pw = null;
    PrintWriter allDiscardedLog = null;
    try {
        in = inputName == null ? IOUtils.openStreamForBufferedReader(stdin()) : IOUtils.openURIForBufferedReading(inputName);
        if (this.allFilteredFileOut != null) {
            allDiscardedLog = IOUtils.openFileForPrintWriter(this.allFilteredFileOut);
        }
        final VCFUtils.CodecAndHeader cah = VCFUtils.parseHeader(in);
        /**
         * find splitter by name
         */
        Splitter splitter = null;
        for (final Splitter s : this.splitters) {
            if (this.splitterName.equals(s.getName())) {
                splitter = s;
                break;
            }
        }
        if (splitter == null) {
            return wrapException("Cannot find a splitter named " + this.splitterName);
        }
        splitter.initialize(cah.header);
        LOG.info("splitter is " + splitter);
        pw = super.openFileOrStdoutAsPrintStream(outorNull);
        // read variants
        final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(cah.header);
        String prev_contig = null;
        for (; ; ) {
            final String line = in.readLine();
            final VariantContext variant = (line == null ? null : progess.watch(cah.codec.decode(line)));
            if (variant == null || !variant.getContig().equals(prev_contig)) {
                if (sortingcollection != null) {
                    sortingcollection.doneAdding();
                    iter = sortingcollection.iterator();
                    LOG.info("dumping data for CONTIG: \"" + prev_contig + "\"");
                    final EqualRangeIterator<KeyAndLine> eqiter = new EqualRangeIterator<>(iter, new Comparator<KeyAndLine>() {

                        @Override
                        public int compare(final KeyAndLine o1, final KeyAndLine o2) {
                            return o1.key.compareTo(o2.key);
                        }
                    });
                    while (eqiter.hasNext()) {
                        final List<KeyAndLine> buffer = eqiter.next();
                        final KeyAndLine first = buffer.get(0);
                        LOG.info(first.key);
                        final List<VariantContext> variants = new ArrayList<>(buffer.size());
                        boolean has_only_filtered = true;
                        for (final KeyAndLine kal : buffer) {
                            final VariantContext ctx = cah.codec.decode(kal.ctx);
                            variants.add(ctx);
                            if (isDebuggingVariant(ctx)) {
                                LOG.info("Adding variant to list for key " + kal.key + " " + shortName(ctx));
                            }
                            if (!ctx.getContig().equals(prev_contig)) {
                                eqiter.close();
                                return wrapException("illegal state");
                            }
                            if (!ctx.isFiltered() || this.acceptFiltered) {
                                has_only_filtered = false;
                            // break; NOOOONNN !!!
                            }
                        }
                        // all ctx are filtered
                        if (has_only_filtered) {
                            LOG.warn("ALL IS FILTERED in " + first.key);
                            if (allDiscardedLog != null) {
                                for (final VariantContext ctx : variants) {
                                    if (isDebuggingVariant(ctx)) {
                                        LOG.info("Variant " + shortName(ctx) + " is part of never filtered for " + first.key);
                                    }
                                    allDiscardedLog.println(String.join("\t", first.key, ctx.getContig(), String.valueOf(ctx.getStart()), ctx.getReference().getDisplayString(), ctx.getAlternateAllele(0).getDisplayString(), String.valueOf(ctx.getFilters())));
                                }
                            }
                            continue;
                        }
                        // save vcf file
                        final VariantContextWriter out = VCFUtils.createVariantContextWriterToOutputStream(IOUtils.uncloseableOutputStream(pw));
                        final VCFHeader header2 = addMetaData(new VCFHeader(cah.header));
                        header2.addMetaDataLine(new VCFHeaderLine(VCF_HEADER_SPLITKEY, first.key));
                        out.writeHeader(header2);
                        for (final VariantContext ctx : variants) {
                            if (isDebuggingVariant(ctx)) {
                                LOG.info("saving variant " + shortName(ctx) + " to final output with key=" + first.key);
                            }
                            out.add(ctx);
                        }
                        // yes because wrapped into IOUtils.encloseableOutputSream
                        out.close();
                        pw.flush();
                    }
                    eqiter.close();
                    iter.close();
                    iter = null;
                    // dispose sorting collection
                    sortingcollection.cleanup();
                    sortingcollection = null;
                }
                // EOF met
                if (variant == null)
                    break;
                prev_contig = variant.getContig();
            }
            if (sortingcollection == null) {
                /* create sorting collection for new contig */
                sortingcollection = SortingCollection.newInstance(KeyAndLine.class, new KeyAndLineCodec(), new KeyAndLineComparator(), this.writingSortingCollection.maxRecordsInRam, this.writingSortingCollection.getTmpPaths());
                sortingcollection.setDestructiveIteration(true);
            }
            if (variant.getAlternateAlleles().size() != 1) {
                return wrapException("Expected only one allele per variant. Please use VcfMultiToOneAllele https://github.com/lindenb/jvarkit/wiki/VcfMultiToOneAllele.");
            }
            // no check for ctx.ifFiltered here, we do this later.
            for (final String key : splitter.keys(variant)) {
                if (isDebuggingVariant(variant)) {
                    LOG.info("Adding variant with key " + key + " " + shortName(variant));
                }
                sortingcollection.add(new KeyAndLine(key, line));
            }
        }
        progess.finish();
        pw.flush();
        pw.close();
        pw = null;
        if (allDiscardedLog != null) {
            allDiscardedLog.flush();
            allDiscardedLog.close();
            allDiscardedLog = null;
        }
        return RETURN_OK;
    } catch (final Exception err) {
        return wrapException(err);
    } finally {
        CloserUtil.close(iter);
        if (sortingcollection != null)
            sortingcollection.cleanup();
        CloserUtil.close(in);
        CloserUtil.close(pw);
        CloserUtil.close(allDiscardedLog);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter) PrintStream(java.io.PrintStream) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) IOException(java.io.IOException) BufferedReader(java.io.BufferedReader)

Example 18 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfDerby01 method doReadConcatenatedVcf.

private int doReadConcatenatedVcf(List<String> args) {
    int number_of_ref_allele_truncated = 0;
    PreparedStatement pstmt = null;
    PreparedStatement pstmt2 = null;
    PreparedStatement pstmt3 = null;
    ResultSet row = null;
    PrintWriter pw = null;
    args = new ArrayList<>(IOUtils.unrollFiles(args));
    LOG.info(args.toString());
    LineIterator lineIter = null;
    final String titleHeaderTag = (this.titleHeaderStr == null || this.titleHeaderStr.trim().isEmpty() ? null : "##" + titleHeaderStr + "=");
    try {
        int fileidx = 0;
        pw = openFileOrStdoutAsPrintWriter(this.outputFile);
        pw.println("#ID\tNAME");
        do {
            if (fileidx == 0 && args.isEmpty()) {
                lineIter = IOUtils.openStreamForLineIterator(stdin());
            } else {
                lineIter = IOUtils.openURIForLineIterator(args.get(fileidx));
            }
            int num_vcf_in_this_stream = 0;
            while (lineIter.hasNext()) {
                String filename = "vcf" + (++ID_GENERATOR);
                if (num_vcf_in_this_stream == 0 && !args.isEmpty()) {
                    filename = args.get(fileidx);
                }
                final List<String> headerLines = new ArrayList<>();
                while (lineIter.hasNext() && lineIter.peek().startsWith("#")) {
                    final String h = lineIter.next();
                    if (h.startsWith(VCF_HEADER_FILE_ID) || h.startsWith(VCF_HEADER_FILE_NAME)) {
                        LOG.info("Ignoring line " + h);
                        continue;
                    }
                    /* find filename in vcf header */
                    if (titleHeaderTag != null && h.startsWith(titleHeaderTag) && h.trim().length() > titleHeaderTag.length()) {
                        filename = h.substring(titleHeaderTag.length()).trim();
                    }
                    headerLines.add(h);
                }
                final VCFUtils.CodecAndHeader cah = VCFUtils.parseHeader(headerLines);
                pstmt = this.conn.prepareStatement("INSERT INTO VCF(NAME) VALUES(?)", PreparedStatement.RETURN_GENERATED_KEYS);
                pstmt.setString(1, filename);
                if (pstmt.executeUpdate() != 1) {
                    LOG.error("Cannot insert VCF ?");
                    return -1;
                }
                final long vcf_id = getLastGeneratedId(pstmt);
                pstmt.close();
                pw.print(vcf_id);
                pw.print("\t");
                pw.println(filename);
                pw.flush();
                pstmt = this.conn.prepareStatement("SELECT ID FROM ROWCONTENT WHERE MD5SUM=?");
                pstmt2 = this.conn.prepareStatement("INSERT INTO ROWCONTENT(MD5SUM,CONTENT,CONTIG,START,STOP,ALLELE_REF,FILTERED) VALUES (?,?,?,?,?,?,?)", PreparedStatement.RETURN_GENERATED_KEYS);
                pstmt3 = this.conn.prepareStatement("INSERT INTO VCFROW(VCF_ID,ROW_ID) VALUES (?,?)");
                pstmt3.setLong(1, vcf_id);
                final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(cah.header);
                /* insert VCF header lines */
                for (final String line : headerLines) {
                    final String md5 = this.toMd5.apply(line);
                    long content_id = -1L;
                    pstmt.setString(1, md5);
                    row = pstmt.executeQuery();
                    while (row.next()) {
                        content_id = row.getLong(1);
                    }
                    row.close();
                    /* vcf content was not found, create it */
                    if (content_id == -1L) {
                        pstmt2.setString(1, md5);
                        pstmt2.setString(2, line);
                        pstmt2.setNull(3, Types.VARCHAR);
                        pstmt2.setNull(4, Types.INTEGER);
                        pstmt2.setNull(5, Types.INTEGER);
                        pstmt2.setNull(6, Types.VARCHAR);
                        pstmt2.setShort(7, (short) 1);
                        if (pstmt2.executeUpdate() != 1) {
                            LOG.error("Cannot insert ROWCONTENT ?");
                            return -1;
                        }
                        content_id = getLastGeneratedId(pstmt2);
                    }
                    /* insert new VCF row */
                    pstmt3.setLong(2, content_id);
                    if (pstmt3.executeUpdate() != 1) {
                        LOG.error("Cannot insert VCFROW ?");
                        return -1;
                    }
                }
                LOG.info("Inserted " + filename + " ID=" + vcf_id);
                while (lineIter.hasNext() && !lineIter.peek().startsWith("#")) {
                    final String line = lineIter.next();
                    final String md5 = this.toMd5.apply(line);
                    long content_id = -1L;
                    pstmt.setString(1, md5);
                    row = pstmt.executeQuery();
                    while (row.next()) {
                        content_id = row.getLong(1);
                    }
                    row.close();
                    /* vcf variants content was not found, create it */
                    if (content_id == -1L) {
                        /* decode to get chrom/start/end/ref */
                        final VariantContext ctx = progress.watch(cah.codec.decode(line));
                        pstmt2.setString(1, md5);
                        pstmt2.setString(2, line);
                        pstmt2.setString(3, ctx.getContig());
                        pstmt2.setInt(4, ctx.getStart());
                        pstmt2.setInt(5, ctx.getEnd());
                        String refBase = ctx.getReference().getBaseString();
                        /* sql table for Ref_allele is a varchar(MAX_REF_BASE_LENGTH) */
                        if (refBase.length() > 50) {
                            LOG.warn("Warning: TRUNCATING LARGE REF BASE TO FIT IN DATABASE : VARCHAR(" + MAX_REF_BASE_LENGTH + ") characters:" + refBase);
                            refBase = refBase.substring(0, MAX_REF_BASE_LENGTH);
                            ++number_of_ref_allele_truncated;
                        }
                        pstmt2.setString(6, refBase);
                        pstmt2.setShort(7, (short) (ctx.isFiltered() ? 1 : 0));
                        if (pstmt2.executeUpdate() != 1) {
                            LOG.error("Cannot insert ROWCONTENT ?");
                            return -1;
                        }
                        content_id = getLastGeneratedId(pstmt2);
                    }
                    /* insert new VCF row */
                    pstmt3.setLong(2, content_id);
                    if (pstmt3.executeUpdate() != 1) {
                        LOG.error("Cannot insert VCFROW ?");
                        return -1;
                    }
                }
                pstmt2.close();
                pstmt3.close();
                pstmt.close();
                progress.finish();
                num_vcf_in_this_stream++;
            }
            /* end of while iter has next */
            CloserUtil.close(lineIter);
            lineIter = null;
            fileidx++;
        } while (fileidx < args.size());
        pw.flush();
        pw.close();
        compress();
        LOG.warn("Number of REF alleles length(REF)> VARCHAR(" + MAX_REF_BASE_LENGTH + ") truncated:" + number_of_ref_allele_truncated);
        return RETURN_OK;
    } catch (final Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(pw);
        CloserUtil.close(row);
        CloserUtil.close(pstmt);
        CloserUtil.close(pstmt2);
        CloserUtil.close(pstmt3);
        CloserUtil.close(lineIter);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) PreparedStatement(java.sql.PreparedStatement) LineIterator(htsjdk.tribble.readers.LineIterator) SQLException(java.sql.SQLException) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) ResultSet(java.sql.ResultSet) PrintWriter(java.io.PrintWriter)

Example 19 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfDoest method run.

private void run(final LineIterator lr, final PrintWriter pw) throws IOException {
    SortingCollection<TranscriptInfo> sorting = null;
    CloseableIterator<TranscriptInfo> iter2 = null;
    try {
        while (lr.hasNext()) {
            VcfIterator in = VCFUtils.createVcfIteratorFromLineIterator(lr, true);
            final VCFHeader header = in.getHeader();
            final Pedigree pedigree = Pedigree.newParser().parse(header);
            if (pedigree.isEmpty()) {
                throw new IOException("No pedigree found in header VCF header. use VcfInjectPedigree to add it");
            }
            final SortedSet<Pedigree.Person> individuals = new TreeSet<>();
            for (final Pedigree.Person individual : pedigree.getPersons()) {
                if (individual.isAffected() || individual.isUnaffected()) {
                    individuals.add(individual);
                }
            }
            boolean first = true;
            pw.println("# samples ( 0: unaffected 1:affected)");
            pw.print("population <- data.frame(family=c(");
            first = true;
            for (final Pedigree.Person person : individuals) {
                if (!first)
                    pw.print(",");
                pw.print("\"" + person.getFamily().getId() + "\"");
                first = false;
            }
            pw.print("),name=c(");
            first = true;
            for (final Pedigree.Person person : individuals) {
                if (!first)
                    pw.print(",");
                pw.print("\"" + person.getId() + "\"");
                first = false;
            }
            pw.print("),status=c(");
            first = true;
            for (final Pedigree.Person person : individuals) {
                if (!first)
                    pw.print(",");
                pw.print(person.isUnaffected() ? 0 : 1);
                first = false;
            }
            pw.println("))");
            sorting = SortingCollection.newInstance(TranscriptInfo.class, new TranscriptInfoCodec(), new TranscriptInfoCmp(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
            sorting.setDestructiveIteration(true);
            final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
            /**
             * loop over variants
             */
            while (in.hasNext() && !pw.checkError()) {
                final VariantContext ctx = progess.watch(in.next());
                if (ctx.isFiltered())
                    continue;
                if (ctx.getAlternateAlleles().isEmpty())
                    continue;
                final Allele altAllele = ctx.getAltAlleleWithHighestAlleleCount();
                final MafCalculator mafCalculator = new MafCalculator(altAllele, ctx.getContig());
                boolean genotyped = false;
                for (final Pedigree.Person p : pedigree.getPersons()) {
                    if (!(p.isAffected() || p.isUnaffected()))
                        continue;
                    final Genotype g = ctx.getGenotype(p.getId());
                    if (g == null)
                        throw new IOException("Strange I cannot find individual " + p + " in the pedigree. Aborting.");
                    if (g.isCalled()) {
                        mafCalculator.add(g, p.isMale());
                    }
                    if (g.isHet() || g.isHomVar()) {
                        if (!g.getAlleles().contains(altAllele))
                            continue;
                        genotyped = true;
                        break;
                    }
                }
                if (!genotyped)
                    continue;
                final Interval interval = new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd());
                final List<KnownGene> genes = this.overlap(interval);
                if (genes.isEmpty())
                    continue;
                for (final KnownGene kg : genes) {
                    final TranscriptInfo trInfo = new TranscriptInfo();
                    trInfo.contig = kg.getContig();
                    trInfo.txStart = kg.getTxStart();
                    trInfo.txEnd = kg.getTxEnd();
                    trInfo.transcriptName = kg.getName();
                    trInfo.strand = (byte) (kg.isPositiveStrand() ? '+' : '-');
                    trInfo.exonCount = kg.getExonCount();
                    trInfo.transcriptLength = kg.getTranscriptLength();
                    trInfo.ctxStart = ctx.getStart();
                    trInfo.ref = ctx.getReference();
                    trInfo.alt = altAllele;
                    trInfo.maf = mafCalculator.getMaf();
                    trInfo.genotypes = new byte[individuals.size()];
                    int idx = 0;
                    for (final Pedigree.Person individual : individuals) {
                        final Genotype genotype = ctx.getGenotype(individual.getId());
                        final byte b;
                        if (genotype.isHomRef()) {
                            b = 0;
                        } else if (genotype.isHomVar() && genotype.getAlleles().contains(altAllele)) {
                            b = 2;
                        } else if (genotype.isHet() && genotype.getAlleles().contains(altAllele) && genotype.getAlleles().contains(ctx.getReference())) {
                            b = 1;
                        } else /* we treat 0/2 has hom-ref */
                        if (genotype.isHet() && !genotype.getAlleles().contains(altAllele) && genotype.getAlleles().contains(ctx.getReference())) {
                            LOG.warn("Treating " + genotype + " as hom-ref (0) alt=" + altAllele);
                            b = 0;
                        } else /* we treat 2/2 has hom-ref */
                        if (genotype.isHomVar() && !genotype.getAlleles().contains(altAllele)) {
                            LOG.warn("Treating " + genotype + " as hom-ref (0) alt=" + altAllele);
                            b = 0;
                        } else {
                            b = -9;
                        }
                        trInfo.genotypes[idx] = b;
                        ++idx;
                    }
                    KnownGene archetype = kg;
                    /* find gene archetype = longest overlapping */
                    for (final KnownGene kg2 : genes) {
                        if (kg2 == kg)
                            continue;
                        if (archetype.getStrand().equals(kg2.getStrand()) && archetype.getTranscriptLength() < kg2.getTranscriptLength()) {
                            archetype = kg2;
                        }
                    }
                    trInfo.archetypeName = archetype.getName();
                    trInfo.archetypeLength = archetype.getTranscriptLength();
                    boolean ctxWasFoundInExon = false;
                    final int ctxPos0 = ctx.getStart() - 1;
                    int indexInTranscript0 = 0;
                    for (final KnownGene.Exon exon : kg.getExons()) {
                        // variant in exon ?
                        if (!(exon.getStart() > (ctx.getEnd() - 1) || (ctx.getStart() - 1) >= exon.getEnd())) {
                            ctxWasFoundInExon = true;
                            indexInTranscript0 += (ctxPos0 - exon.getStart());
                            if (kg.isNegativeStrand()) {
                                indexInTranscript0 = (kg.getTranscriptLength() - 1) - indexInTranscript0;
                            }
                            trInfo.indexInTranscript0 = indexInTranscript0;
                            trInfo.overlapName = exon.getName();
                            sorting.add(trInfo);
                            break;
                        } else {
                            indexInTranscript0 += (exon.getEnd() - exon.getStart());
                        }
                    }
                    if (ctxWasFoundInExon) {
                        continue;
                    }
                    indexInTranscript0 = 0;
                    // search closest intron/exon junction
                    for (int ex = 0; ex + 1 < kg.getExonCount(); ++ex) {
                        final KnownGene.Exon exon1 = kg.getExon(ex);
                        indexInTranscript0 += (exon1.getEnd() - exon1.getStart());
                        final KnownGene.Exon exon2 = kg.getExon(ex + 1);
                        if (exon1.getEnd() <= ctxPos0 && ctxPos0 < exon2.getStart()) {
                            final int dist_to_exon1 = ctxPos0 - exon1.getEnd();
                            final int dist_to_exon2 = exon2.getStart() - ctxPos0;
                            if (dist_to_exon2 < dist_to_exon1) {
                                indexInTranscript0++;
                            }
                            if (kg.isNegativeStrand()) {
                                indexInTranscript0 = (kg.getTranscriptLength() - 1) - indexInTranscript0;
                            }
                            trInfo.indexInTranscript0 = indexInTranscript0;
                            trInfo.overlapName = exon1.getNextIntron().getName();
                            sorting.add(trInfo);
                            break;
                        }
                    }
                }
            // end loop over genes
            }
            // end while loop over variants
            progess.finish();
            sorting.doneAdding();
            LOG.info("done adding");
            iter2 = sorting.iterator();
            final EqualRangeIterator<TranscriptInfo> eqiter = new EqualRangeIterator<TranscriptInfo>(iter2, new Comparator<TranscriptInfo>() {

                @Override
                public int compare(final TranscriptInfo o1, final TranscriptInfo o2) {
                    int i = o1.contig.compareTo(o2.contig);
                    if (i != 0)
                        return i;
                    i = o1.transcriptName.compareTo(o2.transcriptName);
                    return i;
                }
            });
            while (eqiter.hasNext()) {
                final List<TranscriptInfo> list = eqiter.next();
                final TranscriptInfo front = list.get(0);
                pw.println("# BEGIN TRANSCRIPT " + front.transcriptName + " ##########################################");
                pw.println("transcript.chrom <- \"" + front.contig + "\"");
                pw.println("transcript.txStart0 <- " + front.txStart + "");
                pw.println("transcript.txEnd0 <- " + front.txEnd + "");
                pw.println("transcript.name <- \"" + front.transcriptName + "\"");
                pw.println("transcript.strand <- \"" + ((char) front.strand) + "\"");
                pw.println("transcript.length <- " + front.transcriptLength + "");
                pw.println("transcript.exonCount <- " + front.exonCount + "");
                pw.println("archetype.name <- \"" + front.archetypeName + "\"");
                pw.println("archetype.length <- " + front.archetypeLength + "");
                pw.print("variants <- data.frame(chrom=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print("\"" + v.contig + "\"");
                    first = false;
                }
                pw.print("),chromStart=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print(v.ctxStart);
                    first = false;
                }
                pw.print("),chromEnd=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print(v.ctxStart + v.ref.length() - 1);
                    first = false;
                }
                pw.print("),refAllele=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print("\"" + v.ref.getDisplayString() + "\"");
                    first = false;
                }
                pw.print("),altAllele=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print("\"" + v.alt.getDisplayString() + "\"");
                    first = false;
                }
                pw.print("),positionInTranscript1=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print(v.indexInTranscript0 + 1);
                    first = false;
                }
                pw.print("),maf=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print(v.maf);
                    first = false;
                }
                pw.print("),overlapName=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print("\"" + v.overlapName + "\"");
                    first = false;
                }
                pw.println("))");
                pw.println("# genotypes as a list. Should be a multiple of length(samples).");
                pw.println("# 0 is homref (0/0), 1 is het (0/1), 2 is homvar (1/1)");
                pw.println("# if the variant contains another ALT allele: (0/2) and (2/2) are considered 0 (homref)");
                pw.print("genotypes <- c(");
                first = true;
                for (final TranscriptInfo tr : list) {
                    for (byte g : tr.genotypes) {
                        if (!first)
                            pw.print(",");
                        first = false;
                        pw.print((int) g);
                    }
                }
                pw.println(")");
                pw.println("stopifnot(NROW(variants) * NROW(population) == length(genotypes) )");
                if (this.userDefinedFunName == null || this.userDefinedFunName.trim().isEmpty()) {
                    pw.println("## WARNING not user-defined R function was defined");
                } else {
                    pw.println("# consumme data with user-defined R function ");
                    pw.println(this.userDefinedFunName + "()");
                }
                pw.println("# END TRANSCRIPT " + front.transcriptName + " ##########################################");
            }
            // end while eqiter
            eqiter.close();
            iter2.close();
            iter2 = null;
            sorting.cleanup();
            sorting = null;
        }
    } finally {
        CloserUtil.close(iter2);
        if (sorting != null)
            sorting.cleanup();
    }
}
Also used : VariantContext(htsjdk.variant.variantcontext.VariantContext) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) TreeSet(java.util.TreeSet) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Genotype(htsjdk.variant.variantcontext.Genotype) IOException(java.io.IOException) Allele(htsjdk.variant.variantcontext.Allele) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene) Interval(htsjdk.samtools.util.Interval)

Example 20 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfFilterNotInPedigree method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter delegate) {
    final VariantContextWriter out = this.component.open(delegate);
    final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(in.getHeader()).logger(LOG);
    out.writeHeader(in.getHeader());
    while (in.hasNext()) {
        out.add(progess.watch(in.next()));
    }
    progess.finish();
    out.close();
    return 0;
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) DelegateVariantContextWriter(com.github.lindenb.jvarkit.util.vcf.DelegateVariantContextWriter) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter)

Aggregations

SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)146 ArrayList (java.util.ArrayList)64 VariantContext (htsjdk.variant.variantcontext.VariantContext)59 VCFHeader (htsjdk.variant.vcf.VCFHeader)57 SAMRecord (htsjdk.samtools.SAMRecord)54 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)54 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)48 IOException (java.io.IOException)48 File (java.io.File)47 SamReader (htsjdk.samtools.SamReader)40 SAMFileHeader (htsjdk.samtools.SAMFileHeader)38 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)37 HashSet (java.util.HashSet)34 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)32 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)30 List (java.util.List)30 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)29 HashMap (java.util.HashMap)28 Parameter (com.beust.jcommander.Parameter)27 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)27