Search in sources :

Example 36 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfUcsc method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter out) {
    final String TAG;
    if (StringUtil.isBlank(this.infoTag)) {
        TAG = "UCSC_" + database.toUpperCase() + "_" + table.toUpperCase();
    } else {
        TAG = this.infoTag;
    }
    VCFHeader header = in.getHeader();
    final VCFHeader h2 = new VCFHeader(header);
    h2.addMetaDataLine(new VCFInfoHeaderLine(TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, database + "." + table));
    if (!StringUtil.isBlank(this.filterIn)) {
        h2.addMetaDataLine(new VCFFilterHeaderLine(this.filterIn, "Set by " + this.getClass().getName()));
    } else if (!StringUtil.isBlank(this.filterOut)) {
        h2.addMetaDataLine(new VCFFilterHeaderLine(this.filterOut, "Set by " + this.getClass().getName()));
    }
    h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
    h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
    out.writeHeader(h2);
    final Map<Integer, PreparedStatement> bin2pstmt = new HashMap<>();
    try {
        if (!this.has_bin_column) {
            bin2pstmt.put(0, createPreparedStatement(0));
        }
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        while (in.hasNext()) {
            final VariantContext ctx = progress.watch(in.next());
            int start0, end0;
            if (// mutation starts *after* the base
            ctx.isIndel()) {
                start0 = ctx.getStart();
                end0 = ctx.getEnd();
            } else {
                start0 = ctx.getStart() - 1;
                end0 = ctx.getEnd();
            }
            // extends left/right
            start0 = Math.max(0, start0 - this.extend_bases);
            end0 += this.extend_bases;
            final Set<String> atts = new HashSet<String>();
            if (this.has_bin_column) {
                final List<Integer> binList = reg2bins(start0, end0);
                PreparedStatement pstmt = bin2pstmt.get(binList.size());
                if (pstmt == null) {
                    LOG.debug("create prepared statemement for bin.size=" + binList.size() + "[" + start0 + ":" + end0 + "]");
                    pstmt = createPreparedStatement(binList.size());
                    bin2pstmt.put(binList.size(), pstmt);
                }
                initPstmt(pstmt, ctx.getContig(), start0, end0);
                for (int x = 0; x < binList.size(); ++x) {
                    pstmt.setInt(4 + x, binList.get(x));
                }
                select(atts, pstmt);
            } else {
                // already defined
                final PreparedStatement pstmt = bin2pstmt.get(0);
                initPstmt(pstmt, ctx.getContig(), start0, end0);
                select(atts, pstmt);
            }
            if (atts.isEmpty() && StringUtil.isBlank(this.filterIn) && StringUtil.isBlank(this.filterOut)) {
                out.add(ctx);
                continue;
            }
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            if (!StringUtil.isBlank(this.filterIn) && !atts.isEmpty()) {
                vcb.filter(this.filterIn);
            } else if (!StringUtil.isBlank(this.filterOut) && atts.isEmpty()) {
                vcb.filter(this.filterOut);
            }
            if (!atts.isEmpty()) {
                vcb.attribute(TAG, atts.toArray());
            }
            out.add(vcb.make());
        }
        progress.finish();
        return 0;
    } catch (final SQLException err) {
        LOG.error(err);
        throw new RuntimeException("SQLError", err);
    } finally {
        for (final PreparedStatement pstmt : bin2pstmt.values()) {
            CloserUtil.close(pstmt);
        }
        bin2pstmt.clear();
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) SQLException(java.sql.SQLException) VariantContext(htsjdk.variant.variantcontext.VariantContext) PreparedStatement(java.sql.PreparedStatement) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 37 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfFilterJdk method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator iter, final VariantContextWriter delegate) {
    final CtxWriterFactory.CtxWriter out = this.component.open(delegate);
    out.writeHeader(iter.getHeader());
    out.filter_instance.userData.put("first.variant", Boolean.TRUE);
    out.filter_instance.userData.put("last.variant", Boolean.FALSE);
    final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(iter.getHeader()).logger(LOG);
    while (iter.hasNext() && !out.checkError()) {
        out.add(progress.watch(iter.next()));
        out.filter_instance.userData.put("first.variant", Boolean.FALSE);
        out.filter_instance.userData.put("last.variant", !iter.hasNext());
        final Object stop = out.filter_instance.userData.get("STOP");
        if (Boolean.TRUE.equals(stop))
            break;
    }
    progress.finish();
    out.close();
    return 0;
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)

Example 38 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VCFFixIndels method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator r, VariantContextWriter w) {
    long nChanged = 0L;
    final String TAG = "INDELFIXED";
    final VCFHeader header = r.getHeader();
    final VCFHeader h2 = new VCFHeader(header);
    addMetaData(h2);
    h2.addMetaDataLine(new VCFInfoHeaderLine(TAG, 1, VCFHeaderLineType.String, "Fix Indels for @SolenaLS (position|alleles...)"));
    w.writeHeader(h2);
    final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
    while (r.hasNext()) {
        boolean somethingChanged = false;
        VariantContext ctx = progress.watch(r.next());
        /* map old allele to new allele */
        Map<Allele, Allele> original2modified = new HashMap<Allele, Allele>();
        /* did we found a strange allele (symbolic, etc ) */
        boolean strange_allele_flag = false;
        for (final Allele a : ctx.getAlleles()) {
            original2modified.put(a, a);
            if (a.isSymbolic() || a.isNoCall()) {
                strange_allele_flag = true;
                break;
            }
        }
        if (strange_allele_flag || original2modified.size() < 2) /* at least 2 alleles: REF+ ALT */
        {
            w.add(ctx);
            continue;
        }
        /* record chromStart if we need to shift to the right */
        int chromStart = ctx.getStart();
        /* trim right then left  */
        for (int side = 0; side < 2; ++side) {
            boolean done = false;
            while (!done) {
                boolean ok_side = true;
                /* common nucleotide at end/start */
                Character targetChar = null;
                done = true;
                // scan side
                Set<Allele> keys = new HashSet<>(original2modified.keySet());
                for (Allele k : keys) {
                    Allele newAllele = original2modified.get(k);
                    if (newAllele.isSymbolic()) {
                        ok_side = false;
                        break;
                    }
                    String baseString = newAllele.getBaseString().trim().toUpperCase();
                    if (baseString.length() < 2) {
                        ok_side = false;
                        break;
                    }
                    /* first or last char or all sequences
						 * side==0 : right
						 * side==1 : left
						 * */
                    Character baseChar = (side == 0 ? baseString.charAt(baseString.length() - 1) : baseString.charAt(0));
                    if (targetChar == null) {
                        targetChar = baseChar;
                    } else if (!targetChar.equals(baseChar)) {
                        /* doesn't end with same nucleotide */
                        ok_side = false;
                        break;
                    }
                }
                /* ok we can shift all alleles */
                if (ok_side && targetChar != null) {
                    done = false;
                    somethingChanged = true;
                    for (Allele k : keys) {
                        Allele newAllele = original2modified.get(k);
                        String baseString = newAllele.getBaseString().trim().toUpperCase();
                        if (// remove last nucleotide
                        side == 0) {
                            newAllele = Allele.create(baseString.substring(0, baseString.length() - 1), newAllele.isReference());
                        } else {
                            newAllele = Allele.create(baseString.substring(1), newAllele.isReference());
                        }
                        original2modified.put(k, newAllele);
                    }
                    if (side == 1)
                        chromStart++;
                }
            }
        /* end of while done */
        }
        if (!somethingChanged) {
            w.add(ctx);
            continue;
        }
        final VariantContextBuilder b = new VariantContextBuilder(ctx);
        b.start(chromStart);
        Allele newRef = original2modified.get(ctx.getReference());
        b.stop(chromStart + newRef.getBaseString().length() - 1);
        b.alleles(original2modified.values());
        List<Genotype> genotypes = new ArrayList<>();
        for (final String sample : header.getSampleNamesInOrder()) {
            final Genotype g = ctx.getGenotype(sample);
            if (!g.isCalled()) {
                genotypes.add(g);
                continue;
            }
            final GenotypeBuilder gb = new GenotypeBuilder(g);
            final List<Allele> aL = new ArrayList<>();
            for (final Allele a : g.getAlleles()) {
                aL.add(original2modified.get(a));
            }
            gb.alleles(aL);
            genotypes.add(gb.make());
        }
        StringBuilder tagContent = new StringBuilder();
        tagContent.append(String.valueOf(ctx.getStart()));
        for (Allele a : ctx.getAlleles()) {
            tagContent.append("|");
            tagContent.append(a.toString());
        }
        b.attribute(TAG, tagContent.toString());
        b.genotypes(genotypes);
        w.add(b.make());
        ++nChanged;
        if (w.checkError())
            break;
    }
    progress.finish();
    LOG.info("indels changed:" + nChanged);
    return RETURN_OK;
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet)

Example 39 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfOffsetsIndexFactory method indexVcfFile.

/**
 * index a vcf file for its variant offsets
 */
public File indexVcfFile(final File vcfFile, final File indexFile) throws IOException {
    LOG.info("indexing " + vcfFile);
    IOUtil.assertFileIsReadable(vcfFile);
    DataOutputStream daos = null;
    BlockCompressedInputStream bgzin = null;
    AsciiLineReader ascii = null;
    VCFHeader header = null;
    final VCFCodec codec = new VCFCodec();
    SAMSequenceDictionaryProgress progress = null;
    try {
        daos = new DataOutputStream(new FileOutputStream(indexFile));
        daos.write(MAGIC);
        if (vcfFile.getName().endsWith(".vcf.gz")) {
            bgzin = new BlockCompressedInputStream(vcfFile);
            ascii = null;
        } else if (vcfFile.getName().endsWith(".vcf")) {
            bgzin = null;
            ascii = new AsciiLineReader(new FileInputStream(vcfFile));
        } else {
            throw new IllegalArgumentException("not a vcf.gz or vcf file: " + vcfFile);
        }
        final List<String> headerLines = new ArrayList<>();
        for (; ; ) {
            final long offset = (ascii == null ? bgzin.getPosition() : ascii.getPosition());
            final String line = (ascii == null ? bgzin.readLine() : ascii.readLine());
            if (line == null)
                break;
            if (line.startsWith("#")) {
                headerLines.add(line);
                if (line.startsWith("#CHROM")) {
                    codec.readHeader(new LineIterator() {

                        int i = 0;

                        @Override
                        public String next() {
                            final String s = headerLines.get(i);
                            i++;
                            return s;
                        }

                        @Override
                        public boolean hasNext() {
                            return i < headerLines.size();
                        }

                        @Override
                        public String peek() {
                            return i < headerLines.size() ? headerLines.get(i) : null;
                        }
                    });
                    header = VCFUtils.parseHeader(headerLines).header;
                    progress = new SAMSequenceDictionaryProgress(header);
                    progress.logger(this.logger == null ? LOG : this.logger);
                    progress.setLogPrefix("indexing");
                }
                continue;
            }
            if (progress == null) {
                throw new JvarkitException.FileFormatError("no vcf header in " + vcfFile);
            }
            final VariantContext ctx = codec.decode(line);
            progress.watch(ctx);
            if (this.acceptVariant != null) {
                if (!acceptVariant.test(ctx))
                    continue;
            }
            daos.writeLong(offset);
        }
        if (progress == null) {
            throw new JvarkitException.FileFormatError("no vcf header in " + vcfFile);
        }
        progress.finish();
        daos.flush();
        daos.close();
        return indexFile;
    } catch (final IOException err) {
        throw err;
    } finally {
        CloserUtil.close(ascii);
        CloserUtil.close(bgzin);
        CloserUtil.close(daos);
    }
}
Also used : VCFCodec(htsjdk.variant.vcf.VCFCodec) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) AsciiLineReader(htsjdk.tribble.readers.AsciiLineReader) DataOutputStream(java.io.DataOutputStream) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) IOException(java.io.IOException) LineIterator(htsjdk.tribble.readers.LineIterator) FileInputStream(java.io.FileInputStream) FileOutputStream(java.io.FileOutputStream) VCFHeader(htsjdk.variant.vcf.VCFHeader) BlockCompressedInputStream(htsjdk.samtools.util.BlockCompressedInputStream)

Example 40 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfCompareCallersOneSample method doWork.

@Override
public int doWork(List<String> args) {
    File inputFile = null;
    List<EqualRangeVcfIterator> listChallengers = new ArrayList<>();
    VariantContextWriter vcw = null;
    VcfIterator in = null;
    try {
        in = super.openVcfIterator(oneFileOrNull(args));
        VCFHeader header = in.getHeader();
        if (header.getNGenotypeSamples() != 1) {
            LOG.error("vcf.must.have.only.one.sample");
            return -1;
        }
        VCFHeader h2 = new VCFHeader(header);
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));
        SAMSequenceDictionary dict = header.getSequenceDictionary();
        if (dict == null) {
            LOG.error("no.dict.in.vcf");
            return -1;
        }
        Comparator<VariantContext> ctxComparator = VCFUtils.createTidPosComparator(dict);
        /* load files to be challenged */
        for (File cf : this.challengerVcf) {
            // do not challenge vs itself
            if (inputFile != null && inputFile.equals(cf)) {
                LOG.error("Ignoring challenger (self): " + cf);
                continue;
            }
            VcfIterator cin = VCFUtils.createVcfIteratorFromFile(cf);
            VCFHeader ch = cin.getHeader();
            if (ch.getNGenotypeSamples() != 1) {
                LOG.warning("vcf.must.have.only.one.sample");
                cin.close();
                continue;
            }
            if (!header.getSampleNamesInOrder().get(0).equals(ch.getSampleNamesInOrder().get(0))) {
                LOG.warning("Ignoring " + cf + " because not the same sample.");
                cin.close();
                continue;
            }
            SAMSequenceDictionary hdict = ch.getSequenceDictionary();
            if (hdict == null || !SequenceUtil.areSequenceDictionariesEqual(dict, hdict)) {
                LOG.error("not.the.same.sequence.dictionaries");
                return -1;
            }
            listChallengers.add(new EqualRangeVcfIterator(cin, ctxComparator));
        }
        vcw = super.openVariantContextWriter(outputFile);
        vcw.writeHeader(h2);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        VariantContext prev_ctx = null;
        while (in.hasNext() && !vcw.checkError()) {
            VariantContext ctx = progress.watch(in.next());
            // check input order
            if (prev_ctx != null && ctxComparator.compare(prev_ctx, ctx) > 0) {
                LOG.error("bad sort order : got\n\t" + prev_ctx + "\nbefore\n\t" + ctx + "\n");
                return -1;
            }
            prev_ctx = ctx;
            int countInOtherFiles = 0;
            for (EqualRangeVcfIterator citer : listChallengers) {
                boolean foundInThatFile = false;
                List<VariantContext> ctxChallenging = citer.next(ctx);
                for (VariantContext ctx2 : ctxChallenging) {
                    if (!ctx2.getReference().equals(ctx.getReference()))
                        continue;
                    boolean ok = true;
                    if (!this.ignoreAlternate) {
                        Set<Allele> myAlt = new HashSet<Allele>(ctx.getAlternateAlleles());
                        myAlt.removeAll(ctx2.getAlternateAlleles());
                        if (!myAlt.isEmpty())
                            ok = false;
                    }
                    if (ok) {
                        foundInThatFile = true;
                        break;
                    }
                }
                countInOtherFiles += (foundInThatFile ? 1 : 0);
            }
            if (countInOtherFiles >= minCountInclusive && countInOtherFiles <= maxCountInclusive) {
                vcw.add(ctx);
            }
        }
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(vcw);
        CloserUtil.close(listChallengers);
        CloserUtil.close(in);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Allele(htsjdk.variant.variantcontext.Allele) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) HashSet(java.util.HashSet)

Aggregations

SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)146 ArrayList (java.util.ArrayList)64 VariantContext (htsjdk.variant.variantcontext.VariantContext)59 VCFHeader (htsjdk.variant.vcf.VCFHeader)57 SAMRecord (htsjdk.samtools.SAMRecord)54 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)54 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)48 IOException (java.io.IOException)48 File (java.io.File)47 SamReader (htsjdk.samtools.SamReader)40 SAMFileHeader (htsjdk.samtools.SAMFileHeader)38 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)37 HashSet (java.util.HashSet)34 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)32 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)30 List (java.util.List)30 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)29 HashMap (java.util.HashMap)28 Parameter (com.beust.jcommander.Parameter)27 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)27