Search in sources :

Example 16 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class VcfTrap method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VCFIterator iter, final VariantContextWriter out) {
    final Map<String, Path> chromToFile = new HashMap<>();
    IOUtil.assertFileIsReadable(this.manifestFile);
    BufferedReader r = null;
    try {
        r = IOUtils.openPathForBufferedReading(this.manifestFile);
        r.lines().filter(L -> !L.trim().isEmpty()).filter(L -> !L.startsWith("#")).forEach(L -> {
            final int tab = L.indexOf('\t');
            if (tab == -1)
                throw new RuntimeIOException("tab missing in " + L);
            final String contig = L.substring(0, tab);
            if (chromToFile.containsKey(contig)) {
                throw new RuntimeIOException("duplicate contig " + L);
            }
            final Path indexFile = Paths.get(L.substring(tab + 1));
            IOUtil.assertFileIsReadable(indexFile);
            chromToFile.put(contig, indexFile);
        });
    } catch (final IOException err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
    }
    final Function<String, IndexFile> getIndexFile = s -> {
        Path file = chromToFile.get(s);
        if (file == null && s.startsWith("chr"))
            file = chromToFile.get(s.substring(3));
        if (file == null && !s.startsWith("chr"))
            file = chromToFile.get("chr" + s);
        if (file == null)
            return null;
        try {
            return new IndexFile(s, file);
        } catch (final IOException err) {
            throw new RuntimeIOException(err);
        }
    };
    IndexFile current = null;
    final String ATT_MIN = this.ATT + "_MIN";
    final String ATT_MAX = this.ATT + "_MAX";
    final Set<String> contigs_not_found = new HashSet<>();
    final Comparator<TrapRecord> comparator = (A, B) -> {
        if (!A.getContig().equals(B.getContig()))
            throw new IllegalStateException("not the same contigs ???");
        return Integer.compare(A.getStart(), B.getStart());
    };
    final VCFHeader header = new VCFHeader(iter.getHeader());
    final VCFHeader header2 = new VCFHeader(header);
    header2.addMetaDataLine(new VCFInfoHeaderLine(this.ATT, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "TRAP Score:((ALT|GENE|SCORE) in Trap Database  http://trap-score.org/"));
    header2.addMetaDataLine(new VCFInfoHeaderLine(ATT_MIN, 1, VCFHeaderLineType.Float, "Min Score in Trap Database  http://trap-score.org/"));
    header2.addMetaDataLine(new VCFInfoHeaderLine(ATT_MAX, 1, VCFHeaderLineType.Float, "Max Score in Trap Database  http://trap-score.org/"));
    JVarkitVersion.getInstance().addMetaData(this, header2);
    final ProgressFactory.Watcher<VariantContext> progress = ProgressFactory.newInstance().dictionary(iter.getHeader()).logger(LOG).build();
    out.writeHeader(header2);
    while (iter.hasNext()) {
        final VariantContext var = progress.apply(iter.next());
        if (this.ignore_filtered && var.isFiltered()) {
            out.add(var);
            continue;
        }
        if (current == null || !current.contig.equals(var.getContig())) {
            if (current != null) {
                CloserUtil.close(current);
                current = null;
            }
            if (contigs_not_found.contains(var.getContig())) {
                out.add(var);
                continue;
            }
            current = getIndexFile.apply(var.getContig());
        }
        if (current == null) {
            LOG.warn("Not indexed in trap " + var.getContig());
            contigs_not_found.add(var.getContig());
            out.add(var);
            continue;
        }
        final Set<String> annotations = new HashSet<>();
        final Float[] min_score = new Float[] { null };
        final Float[] max_score = new Float[] { null };
        Algorithms.equal_range_stream(current, 0, current.size(), new TrapRecord() {

            @Override
            public int getStart() {
                return var.getStart();
            }

            @Override
            public int getEnd() {
                return var.getEnd();
            }

            @Override
            public String getContig() {
                return var.getContig();
            }

            @Override
            public String getChr() {
                return getContig();
            }

            @Override
            public float getScore() {
                return 0f;
            }

            @Override
            public char getRef() {
                return '\0';
            }

            @Override
            public String getGene() {
                return "";
            }

            @Override
            public char getAlt() {
                return '\0';
            }
        }, comparator, Function.identity()).filter(R -> var.getReference().equals(Allele.create((byte) R.getRef(), true))).filter(R -> var.getAlternateAlleles().stream().anyMatch(A -> A.equals(Allele.create((byte) R.getAlt(), false)))).forEach(R -> {
            annotations.add(String.join("|", String.valueOf(R.getAlt()), R.getGene(), String.format("%." + TrapIndexer.SCORE_STRLEN + "f", R.getScore())));
            if (min_score[0] == null || min_score[0].compareTo(R.getScore()) > 0) {
                min_score[0] = R.getScore();
            }
            if (max_score[0] == null || max_score[0].compareTo(R.getScore()) < 0) {
                max_score[0] = R.getScore();
            }
        });
        if (annotations.isEmpty()) {
            out.add(var);
            continue;
        }
        final VariantContextBuilder vcb = new VariantContextBuilder(var);
        vcb.attribute(this.ATT, new ArrayList<>(annotations));
        vcb.attribute(ATT_MIN, min_score[0]);
        vcb.attribute(ATT_MAX, max_score[0]);
        out.add(var);
    }
    out.close();
    progress.close();
    CloserUtil.close(current);
    return 0;
}
Also used : RandomAccessFile(java.io.RandomAccessFile) Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) IOUtil(htsjdk.samtools.util.IOUtil) VCFHeader(htsjdk.variant.vcf.VCFHeader) AbstractList(java.util.AbstractList) HashMap(java.util.HashMap) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Files(java.nio.file.Files) Logger(com.github.lindenb.jvarkit.util.log.Logger) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) Set(java.util.Set) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) File(java.io.File) List(java.util.List) Paths(java.nio.file.Paths) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) Closeable(java.io.Closeable) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) BufferedReader(java.io.BufferedReader) Comparator(java.util.Comparator) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) Algorithms(com.github.lindenb.jvarkit.util.Algorithms) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) HashMap(java.util.HashMap) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) Path(java.nio.file.Path) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader)

Example 17 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class VcfToIntervals method doWork.

@Override
public int doWork(final List<String> args) {
    if (n_variants_per_interval >= 0 && distance_per_interval >= 0) {
        LOG.info("n-variants per interval and distance both defined");
        return -1;
    } else if (n_variants_per_interval < 0 && distance_per_interval < 0) {
        LOG.info("n-variants per interval or distance must be defined");
        return -1;
    }
    try {
        try (VCFIterator iter = super.openVCFIterator(oneFileOrNull(args))) {
            final VCFHeader header = iter.getHeader();
            final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
            try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
                if (!this.force_bed_output) {
                    final SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
                    final SAMFileHeader samHeader = new SAMFileHeader(dict);
                    samHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
                    for (final String sn : header.getSampleNamesInOrder()) {
                        final SAMReadGroupRecord rg = new SAMReadGroupRecord(sn);
                        rg.setSample(sn);
                        samHeader.addReadGroup(rg);
                    }
                    JVarkitVersion.getInstance().addMetaData(this, samHeader);
                    codec.encode(pw, samHeader);
                }
                while (iter.hasNext()) {
                    final VariantContext first = iter.next();
                    VariantContext last = first;
                    long n_variants = 1;
                    if (this.n_variants_per_interval > 0L) {
                        while (iter.hasNext() && n_variants < this.n_variants_per_interval) {
                            if (!first.contigsMatch(iter.peek())) {
                                break;
                            }
                            // consumme
                            last = iter.next();
                            n_variants++;
                        }
                    } else {
                        while (iter.hasNext()) {
                            final VariantContext curr = iter.peek();
                            if (!first.contigsMatch(curr)) {
                                break;
                            }
                            if (CoordMath.getLength(first.getStart(), curr.getEnd()) > this.distance_per_interval) {
                                break;
                            }
                            // consumme
                            last = iter.next();
                            n_variants++;
                        }
                    }
                    // next variant is just too close than the last one
                    while (this.min_distance >= 0 && iter.hasNext()) {
                        final VariantContext curr = iter.peek();
                        if (!last.withinDistanceOf(curr, this.min_distance))
                            break;
                        // consumme
                        last = iter.next();
                        n_variants++;
                    }
                    pw.print(first.getContig());
                    pw.print("\t");
                    pw.print(first.getStart() - (this.force_bed_output ? 1 : 0));
                    pw.print("\t");
                    pw.print(last.getEnd());
                    pw.print("\t");
                    pw.print(n_variants);
                    pw.print("\t");
                    pw.print(CoordMath.getLength(first.getStart(), last.getEnd()));
                    pw.println();
                }
                // end while iter
                pw.flush();
            }
        // end out
        }
        // end vcf iterator
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
    }
}
Also used : SAMTextHeaderCodec(htsjdk.samtools.SAMTextHeaderCodec) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFIterator(htsjdk.variant.vcf.VCFIterator) PrintWriter(java.io.PrintWriter)

Example 18 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class Launcher method doVcfToVcf.

protected int doVcfToVcf(final String inputNameOrNull, final File outorNull) {
    VCFIterator iterin = null;
    VariantContextWriter w = null;
    try {
        iterin = openVCFIterator(inputNameOrNull);
        w = openVariantContextWriter(outorNull);
        int ret = doVcfToVcf(inputNameOrNull == null ? "<STDIN>" : inputNameOrNull, iterin, w);
        w.close();
        w = null;
        iterin.close();
        iterin = null;
        return ret;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iterin);
        CloserUtil.close(w);
    }
}
Also used : VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFIterator(htsjdk.variant.vcf.VCFIterator) ParameterException(com.beust.jcommander.ParameterException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) IOException(java.io.IOException)

Example 19 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class VcfLiftOver method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VCFIterator in, VariantContextWriter out) {
    VariantContextWriter failed = null;
    GenomicSequence genomicSequence = null;
    try {
        final VCFHeader inputHeader = in.getHeader();
        final Set<VCFHeaderLine> headerLines = inputHeader.getMetaDataInInputOrder().stream().filter(V -> {
            if (!(V instanceof VCFInfoHeaderLine))
                return true;
            final VCFInfoHeaderLine vih = VCFInfoHeaderLine.class.cast(V);
            if (removeInfo.contains(vih.getID()))
                return false;
            return true;
        }).collect(Collectors.toSet());
        if (this.failedFile != null) {
            final VCFHeader header2 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
            header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
            header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
            header2.addMetaDataLine(new VCFInfoHeaderLine(this.failedinfoTag, 1, VCFHeaderLineType.String, "Why the liftOver failed."));
            failed = super.writingVariantsDelegate.dictionary(header2).open(failedFile);
            failed.writeHeader(header2);
        }
        final VCFHeader header3 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
        header3.setSequenceDictionary(this.indexedFastaSequenceFile.getSequenceDictionary());
        JVarkitVersion.getInstance().addMetaData(this, header3);
        header3.addMetaDataLine(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY, true));
        header3.addMetaDataLine(new VCFInfoHeaderLine(this.infoTag, 1, VCFHeaderLineType.String, "Chromosome|Position before liftOver."));
        out.writeHeader(header3);
        while (in.hasNext()) {
            VariantContext ctx = in.next();
            if (!this.removeInfo.isEmpty()) {
                VariantContextBuilder vcb = new VariantContextBuilder(ctx);
                for (final String tag : this.removeInfo) vcb.rmAttribute(tag);
                ctx = vcb.make();
            }
            if (ctx.isIndel() && this.ignoreIndels) {
                if (failed != null)
                    failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "Indel").make());
                continue;
            }
            if (adaptivematch) {
                double minAlleleLength = Math.min(0, ctx.getAlleles().stream().mapToInt(A -> A.length()).min().orElse(0));
                double maxAlleleLength = Math.max(1, ctx.getAlleles().stream().mapToInt(A -> A.length()).max().orElse(1));
                this.liftOver.setLiftOverMinMatch(minAlleleLength / maxAlleleLength);
            }
            final Interval lifted = liftOver.liftOver(new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), // negative strand
            false, String.join("|", ctx.getContig(), String.valueOf(ctx.getStart()), ctx.getReference().toString())));
            if (lifted == null) {
                if (failed != null)
                    failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "LiftOverFailed").make());
            } else if (this.indexedFastaSequenceFile.getSequenceDictionary().getSequence(lifted.getContig()) == null) {
                if (failed != null)
                    failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "ContigMissingDictionary|" + lifted.getContig()).make());
            } else {
                boolean alleleAreValidatedVsRef = true;
                // part of the code was copied from picard/liftovervcf
                final Map<Allele, Allele> reverseComplementAlleleMap = new HashMap<>();
                final List<Allele> alleles = new ArrayList<Allele>();
                for (final Allele oldAllele : ctx.getAlleles()) {
                    final Allele fixedAllele;
                    if (oldAllele.isSymbolic() || oldAllele.isNoCall() || oldAllele.equals(Allele.SPAN_DEL)) {
                        alleles.add(oldAllele);
                        continue;
                    } else if (lifted.isPositiveStrand()) {
                        fixedAllele = oldAllele;
                        alleles.add(oldAllele);
                    } else {
                        fixedAllele = Allele.create(SequenceUtil.reverseComplement(oldAllele.getBaseString()), oldAllele.isReference());
                        alleles.add(fixedAllele);
                        reverseComplementAlleleMap.put(oldAllele, fixedAllele);
                    }
                    if (this.checkAlleleSequence) {
                        if (genomicSequence == null || !genomicSequence.getChrom().equals(lifted.getContig())) {
                            genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, lifted.getContig());
                        }
                        final String alleleStr = fixedAllele.getBaseString();
                        int x = 0;
                        while (x < alleleStr.length() && lifted.getStart() - 1 + x < genomicSequence.length()) {
                            final char refChar = genomicSequence.charAt(lifted.getStart() - 1 + x);
                            if (Character.toLowerCase(refChar) != Character.toLowerCase(alleleStr.charAt(x))) {
                                alleleAreValidatedVsRef = false;
                                break;
                            }
                            ++x;
                        }
                        if (x != alleleStr.length()) {
                            alleleAreValidatedVsRef = false;
                            break;
                        }
                    }
                }
                if (!alleleAreValidatedVsRef) {
                    if (failed != null)
                        failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleMismatchRef").make());
                    continue;
                }
                if (lifted.getEnd() - lifted.getStart() != ctx.getEnd() - ctx.getStart()) {
                    if (failed != null)
                        failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleBadLength|" + lifted.length()).make());
                    continue;
                }
                final VariantContextBuilder vcb = new VariantContextBuilder(ctx.getSource(), lifted.getContig(), lifted.getStart(), lifted.getEnd(), alleles);
                vcb.id(ctx.getID());
                vcb.attributes(ctx.getAttributes());
                vcb.attribute(this.infoTag, ctx.getContig() + "|" + ctx.getStart() + "|" + ctx.getReference().getDisplayString());
                vcb.filters(ctx.getFilters());
                vcb.log10PError(ctx.getLog10PError());
                if (lifted.getStart() != lifted.getEnd()) {
                    vcb.attribute(VCFConstants.END_KEY, lifted.getEnd());
                }
                final GenotypesContext genotypeContext = ctx.getGenotypes();
                final GenotypesContext fixedGenotypes = GenotypesContext.create(genotypeContext.size());
                for (final Genotype genotype : genotypeContext) {
                    final List<Allele> fixedAlleles = new ArrayList<Allele>();
                    for (final Allele allele : genotype.getAlleles()) {
                        final Allele fixedAllele = reverseComplementAlleleMap.containsKey(allele) ? reverseComplementAlleleMap.get(allele) : allele;
                        fixedAlleles.add(fixedAllele);
                    }
                    fixedGenotypes.add(new GenotypeBuilder(genotype).alleles(fixedAlleles).make());
                }
                vcb.genotypes(fixedGenotypes);
                out.add(vcb.make());
            }
        }
        if (failed != null) {
            failed.close();
            failed = null;
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(failed);
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) SequenceUtil(htsjdk.samtools.util.SequenceUtil) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashMap(java.util.HashMap) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) ArrayList(java.util.ArrayList) ReferenceSequenceFile(htsjdk.samtools.reference.ReferenceSequenceFile) HashSet(java.util.HashSet) LiftOver(htsjdk.samtools.liftover.LiftOver) Interval(htsjdk.samtools.util.Interval) Map(java.util.Map) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFConstants(htsjdk.variant.vcf.VCFConstants) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) Collectors(java.util.stream.Collectors) File(java.io.File) ReferenceSequenceFileFactory(htsjdk.samtools.reference.ReferenceSequenceFileFactory) GenotypesContext(htsjdk.variant.variantcontext.GenotypesContext) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) GenotypesContext(htsjdk.variant.variantcontext.GenotypesContext) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) ArrayList(java.util.ArrayList) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashMap(java.util.HashMap) Map(java.util.Map) Interval(htsjdk.samtools.util.Interval)

Example 20 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class VcfFilterByLiftOver method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VCFIterator in, final VariantContextWriter out) {
    final ContigNameConverter anotherVcfCtgConverter;
    if (this.anotherVcfReader != null) {
        final SAMSequenceDictionary dict = this.anotherVcfReader.getHeader().getSequenceDictionary();
        if (dict != null) {
            anotherVcfCtgConverter = ContigNameConverter.fromOneDictionary(dict);
        } else {
            anotherVcfCtgConverter = ContigNameConverter.getIdentity();
        }
    } else {
        anotherVcfCtgConverter = ContigNameConverter.getIdentity();
    }
    final LiftOver liftOver = new LiftOver(this.liftOverFile);
    liftOver.setLiftOverMinMatch(this.userMinMatch);
    final VCFHeader header = in.getHeader();
    final SAMSequenceDictionary dict = header.getSequenceDictionary();
    if (!this.disableValidation && dict != null && !dict.isEmpty()) {
        liftOver.validateToSequences(dict);
    }
    final VCFHeader header2 = new VCFHeader(header);
    final VCFFilterHeaderLine filterLiftOverFailed = new VCFFilterHeaderLine("LIFTOVER_FAILED", "liftover failed " + this.liftOverFile);
    header2.addMetaDataLine(filterLiftOverFailed);
    final VCFFilterHeaderLine filterNoSameContig = new VCFFilterHeaderLine("LIFTOVER_OTHER_CTG", "Variant is mapped to another contig after liftover with " + this.liftOverFile);
    header2.addMetaDataLine(filterNoSameContig);
    final VCFInfoHeaderLine infoLiftOverPos = new VCFInfoHeaderLine("LIFTOVER_LOC", 1, VCFHeaderLineType.String, "Position of the variant liftover-ed with " + this.liftOverFile);
    header2.addMetaDataLine(infoLiftOverPos);
    final VCFFilterHeaderLine filterDistantFromPrev = new VCFFilterHeaderLine("LIFTOVER_DISTANT", "After liftover the distance (< " + min_distance + ") with the previous variant is unusual( > " + max_distance + ") after liftover with " + this.liftOverFile);
    header2.addMetaDataLine(filterDistantFromPrev);
    /* transfert filters to new header */
    if (this.anotherVcfReader != null) {
        this.anotherVcfReader.getHeader().getFilterLines().forEach(F -> header2.addMetaDataLine(F));
    }
    JVarkitVersion.getInstance().addMetaData(this, header2);
    out.writeHeader(header2);
    Locatable prevCtx = null;
    Locatable prevLifted = null;
    final Function<Locatable, String> interval2str = R -> R.getContig() + "|" + R.getStart() + "|" + R.getEnd();
    while (in.hasNext()) {
        final VariantContext ctx = in.next();
        if (prevCtx != null && !prevCtx.getContig().equals(ctx.getContig())) {
            prevCtx = null;
            prevLifted = null;
        }
        final Interval lifted = liftOver.liftOver(new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), // negative strand
        false, interval2str.apply(ctx)));
        // lifover failed
        if (lifted == null) {
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.filter(filterLiftOverFailed.getID());
            out.add(vcb.make());
        } else // another contig
        if (lifted != null && !sameContig(lifted, ctx)) {
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.filter(filterNoSameContig.getID());
            vcb.attribute(infoLiftOverPos.getID(), interval2str.apply(lifted));
            out.add(vcb.make());
        } else // strange distance
        if (prevCtx != null && lifted != null && prevLifted != null && prevCtx.getContig().equals(ctx.getContig()) && sameContig(prevLifted, lifted) && distance(prevCtx, ctx) < this.min_distance && distance(prevLifted, lifted) > this.max_distance) {
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.filter(filterDistantFromPrev.getID());
            vcb.attribute(infoLiftOverPos.getID(), interval2str.apply(lifted));
            out.add(vcb.make());
        } else // filtered in anotherVcf
        if (this.anotherVcfReader != null) {
            final Set<String> found_filtered = new HashSet<>();
            final String ctg2 = anotherVcfCtgConverter.apply(lifted.getContig());
            if (!StringUtils.isBlank(ctg2)) {
                try (CloseableIterator<VariantContext> iter2 = this.anotherVcfReader.query(ctg2, lifted.getStart(), lifted.getEnd())) {
                    while (iter2.hasNext()) {
                        final VariantContext ctx2 = iter2.next();
                        if (!ctx2.isFiltered() || ctx2.getLengthOnReference() != ctx.getLengthOnReference())
                            continue;
                        found_filtered.addAll(ctx2.getFilters());
                        break;
                    }
                }
            }
            if (found_filtered.isEmpty()) {
                out.add(ctx);
            } else {
                // add previous
                found_filtered.addAll(ctx.getFilters());
                final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
                vcb.filters(found_filtered);
                out.add(vcb.make());
            }
        } else {
            out.add(ctx);
        }
        prevCtx = ctx;
        prevLifted = lifted;
    }
    return 0;
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) IOUtil(htsjdk.samtools.util.IOUtil) VCFHeader(htsjdk.variant.vcf.VCFHeader) Function(java.util.function.Function) HashSet(java.util.HashSet) LiftOver(htsjdk.samtools.liftover.LiftOver) Interval(htsjdk.samtools.util.Interval) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) Locatable(htsjdk.samtools.util.Locatable) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFReader(htsjdk.variant.vcf.VCFReader) Set(java.util.Set) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) File(java.io.File) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) CoordMath(htsjdk.samtools.util.CoordMath) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) OnePassVcfLauncher(com.github.lindenb.jvarkit.jcommander.OnePassVcfLauncher) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) LiftOver(htsjdk.samtools.liftover.LiftOver) CloseableIterator(htsjdk.samtools.util.CloseableIterator) HashSet(java.util.HashSet) Set(java.util.Set) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Locatable(htsjdk.samtools.util.Locatable) Interval(htsjdk.samtools.util.Interval)

Aggregations

VCFIterator (htsjdk.variant.vcf.VCFIterator)98 VariantContext (htsjdk.variant.variantcontext.VariantContext)83 VCFHeader (htsjdk.variant.vcf.VCFHeader)76 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)62 Parameter (com.beust.jcommander.Parameter)56 Program (com.github.lindenb.jvarkit.util.jcommander.Program)56 Logger (com.github.lindenb.jvarkit.util.log.Logger)56 ArrayList (java.util.ArrayList)52 List (java.util.List)52 Set (java.util.Set)49 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)46 Collectors (java.util.stream.Collectors)45 HashSet (java.util.HashSet)42 Path (java.nio.file.Path)39 IOException (java.io.IOException)37 Allele (htsjdk.variant.variantcontext.Allele)36 CloserUtil (htsjdk.samtools.util.CloserUtil)35 Genotype (htsjdk.variant.variantcontext.Genotype)33 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)32 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)32