Search in sources :

Example 11 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class VcfGeneOntology method executeThings.

public int executeThings(List<String> args) {
    VcfIterator vcfIn = null;
    try {
        vcfIn = super.openVcfIterator(oneFileOrNull(args));
        this.filterVcfIterator(vcfIn);
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(vcfIn);
    }
}
Also used : VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) XMLStreamException(javax.xml.stream.XMLStreamException) IOException(java.io.IOException)

Example 12 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class VcfCompareCallersOneSample method doWork.

@Override
public int doWork(List<String> args) {
    File inputFile = null;
    List<EqualRangeVcfIterator> listChallengers = new ArrayList<>();
    VariantContextWriter vcw = null;
    VcfIterator in = null;
    try {
        in = super.openVcfIterator(oneFileOrNull(args));
        VCFHeader header = in.getHeader();
        if (header.getNGenotypeSamples() != 1) {
            LOG.error("vcf.must.have.only.one.sample");
            return -1;
        }
        VCFHeader h2 = new VCFHeader(header);
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));
        SAMSequenceDictionary dict = header.getSequenceDictionary();
        if (dict == null) {
            LOG.error("no.dict.in.vcf");
            return -1;
        }
        Comparator<VariantContext> ctxComparator = VCFUtils.createTidPosComparator(dict);
        /* load files to be challenged */
        for (File cf : this.challengerVcf) {
            // do not challenge vs itself
            if (inputFile != null && inputFile.equals(cf)) {
                LOG.error("Ignoring challenger (self): " + cf);
                continue;
            }
            VcfIterator cin = VCFUtils.createVcfIteratorFromFile(cf);
            VCFHeader ch = cin.getHeader();
            if (ch.getNGenotypeSamples() != 1) {
                LOG.warning("vcf.must.have.only.one.sample");
                cin.close();
                continue;
            }
            if (!header.getSampleNamesInOrder().get(0).equals(ch.getSampleNamesInOrder().get(0))) {
                LOG.warning("Ignoring " + cf + " because not the same sample.");
                cin.close();
                continue;
            }
            SAMSequenceDictionary hdict = ch.getSequenceDictionary();
            if (hdict == null || !SequenceUtil.areSequenceDictionariesEqual(dict, hdict)) {
                LOG.error("not.the.same.sequence.dictionaries");
                return -1;
            }
            listChallengers.add(new EqualRangeVcfIterator(cin, ctxComparator));
        }
        vcw = super.openVariantContextWriter(outputFile);
        vcw.writeHeader(h2);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        VariantContext prev_ctx = null;
        while (in.hasNext() && !vcw.checkError()) {
            VariantContext ctx = progress.watch(in.next());
            // check input order
            if (prev_ctx != null && ctxComparator.compare(prev_ctx, ctx) > 0) {
                LOG.error("bad sort order : got\n\t" + prev_ctx + "\nbefore\n\t" + ctx + "\n");
                return -1;
            }
            prev_ctx = ctx;
            int countInOtherFiles = 0;
            for (EqualRangeVcfIterator citer : listChallengers) {
                boolean foundInThatFile = false;
                List<VariantContext> ctxChallenging = citer.next(ctx);
                for (VariantContext ctx2 : ctxChallenging) {
                    if (!ctx2.getReference().equals(ctx.getReference()))
                        continue;
                    boolean ok = true;
                    if (!this.ignoreAlternate) {
                        Set<Allele> myAlt = new HashSet<Allele>(ctx.getAlternateAlleles());
                        myAlt.removeAll(ctx2.getAlternateAlleles());
                        if (!myAlt.isEmpty())
                            ok = false;
                    }
                    if (ok) {
                        foundInThatFile = true;
                        break;
                    }
                }
                countInOtherFiles += (foundInThatFile ? 1 : 0);
            }
            if (countInOtherFiles >= minCountInclusive && countInOtherFiles <= maxCountInclusive) {
                vcw.add(ctx);
            }
        }
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(vcw);
        CloserUtil.close(listChallengers);
        CloserUtil.close(in);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Allele(htsjdk.variant.variantcontext.Allele) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) HashSet(java.util.HashSet)

Example 13 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class VcfIn method doWork.

@Override
public int doWork(final List<String> args) {
    if (!this.filterIn.isEmpty() && !this.filterOut.isEmpty()) {
        LOG.error("Option filterIn/filterOut both defined.");
        return -1;
    }
    if (this.inverse && (!this.filterIn.isEmpty() || !this.filterOut.isEmpty())) {
        LOG.error("Option inverse cannot be used when Option filterin/filterou is defined.");
        return -1;
    }
    String databaseVcfUri;
    String userVcfUri;
    if (args.size() == 1) {
        databaseVcfUri = args.get(0);
        userVcfUri = null;
    } else if (args.size() == 2) {
        databaseVcfUri = args.get(0);
        userVcfUri = args.get(1);
    } else {
        LOG.error("illegal number of arguments");
        return -1;
    }
    VariantContextWriter w = null;
    VcfIterator in = null;
    try {
        in = (userVcfUri == null ? VCFUtils.createVcfIteratorFromInputStream(stdin()) : VCFUtils.createVcfIterator(userVcfUri));
        w = super.openVariantContextWriter(outputFile);
        if (this.databaseIsIndexed) {
            return this.scanUsingTabix(w, databaseVcfUri, in);
        } else {
            return this.scanFileSorted(w, databaseVcfUri, in);
        }
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(in);
        CloserUtil.close(w);
    }
}
Also used : VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException)

Example 14 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class FindCorruptedFiles method testVcf.

private void testVcf(File f, InputStream in) throws IOException, TribbleException {
    long n = 0;
    VcfIterator iter = new VcfIteratorImpl(in);
    iter.getHeader();
    while (iter.hasNext() && (NUM < 0 || n < NUM)) {
        iter.next();
        ++n;
    }
    if (n == 0) {
        emptyFile(f);
    }
    iter.close();
}
Also used : VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) VcfIteratorImpl(com.github.lindenb.jvarkit.util.vcf.VcfIteratorImpl)

Example 15 with VcfIterator

use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.

the class VcfLiftOver method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
    VariantContextWriter failed = null;
    GenomicSequence genomicSequence = null;
    try {
        final VCFHeader inputHeader = in.getHeader();
        final Set<VCFHeaderLine> headerLines = inputHeader.getMetaDataInInputOrder().stream().filter(V -> {
            if (!(V instanceof VCFInfoHeaderLine))
                return true;
            final VCFInfoHeaderLine vih = VCFInfoHeaderLine.class.cast(V);
            if (removeInfo.contains(vih.getID()))
                return false;
            return true;
        }).collect(Collectors.toSet());
        if (this.failedFile != null) {
            final VCFHeader header2 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
            header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
            header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
            header2.addMetaDataLine(new VCFInfoHeaderLine(this.failedinfoTag, 1, VCFHeaderLineType.String, "Why the liftOver failed."));
            failed = super.openVariantContextWriter(failedFile);
            failed.writeHeader(header2);
        }
        final VCFHeader header3 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
        header3.setSequenceDictionary(this.indexedFastaSequenceFile.getSequenceDictionary());
        header3.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        header3.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        header3.addMetaDataLine(new VCFInfoHeaderLine(this.infoTag, 1, VCFHeaderLineType.String, "Chromosome|Position before liftOver."));
        out.writeHeader(header3);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader());
        while (in.hasNext()) {
            VariantContext ctx = progress.watch(in.next());
            if (!this.removeInfo.isEmpty()) {
                VariantContextBuilder vcb = new VariantContextBuilder(ctx);
                for (final String tag : this.removeInfo) vcb.rmAttribute(tag);
                ctx = vcb.make();
            }
            if (ctx.isIndel() && this.ignoreIndels) {
                if (failed != null)
                    failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "Indel").make());
                continue;
            }
            if (adaptivematch) {
                double minAlleleLength = Math.min(0, ctx.getAlleles().stream().mapToInt(A -> A.length()).min().orElse(0));
                double maxAlleleLength = Math.max(1, ctx.getAlleles().stream().mapToInt(A -> A.length()).max().orElse(1));
                this.liftOver.setLiftOverMinMatch(minAlleleLength / maxAlleleLength);
            }
            final Interval lifted = liftOver.liftOver(new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), // negative strand
            false, String.join("|", ctx.getContig(), String.valueOf(ctx.getStart()), ctx.getReference().toString())));
            if (lifted == null) {
                if (failed != null)
                    failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "LiftOverFailed").make());
            } else if (this.indexedFastaSequenceFile.getSequenceDictionary().getSequence(lifted.getContig()) == null) {
                if (failed != null)
                    failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "ContigMissingDictionary|" + lifted.getContig()).make());
            } else {
                boolean alleleAreValidatedVsRef = true;
                // part of the code was copied from picard/liftovervcf
                final Map<Allele, Allele> reverseComplementAlleleMap = new HashMap<>();
                final List<Allele> alleles = new ArrayList<Allele>();
                for (final Allele oldAllele : ctx.getAlleles()) {
                    final Allele fixedAllele;
                    if (oldAllele.isSymbolic() || oldAllele.isNoCall() || oldAllele.equals(Allele.SPAN_DEL)) {
                        alleles.add(oldAllele);
                        continue;
                    } else if (lifted.isPositiveStrand()) {
                        fixedAllele = oldAllele;
                        alleles.add(oldAllele);
                    } else {
                        fixedAllele = Allele.create(SequenceUtil.reverseComplement(oldAllele.getBaseString()), oldAllele.isReference());
                        alleles.add(fixedAllele);
                        reverseComplementAlleleMap.put(oldAllele, fixedAllele);
                    }
                    if (this.checkAlleleSequence) {
                        if (genomicSequence == null || !genomicSequence.getChrom().equals(lifted.getContig())) {
                            genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, lifted.getContig());
                        }
                        final String alleleStr = fixedAllele.getBaseString();
                        int x = 0;
                        while (x < alleleStr.length() && lifted.getStart() - 1 + x < genomicSequence.length()) {
                            final char refChar = genomicSequence.charAt(lifted.getStart() - 1 + x);
                            if (Character.toLowerCase(refChar) != Character.toLowerCase(alleleStr.charAt(x))) {
                                alleleAreValidatedVsRef = false;
                                break;
                            }
                            ++x;
                        }
                        if (x != alleleStr.length()) {
                            alleleAreValidatedVsRef = false;
                            break;
                        }
                    }
                }
                if (!alleleAreValidatedVsRef) {
                    if (failed != null)
                        failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleMismatchRef").make());
                    continue;
                }
                if (lifted.getEnd() - lifted.getStart() != ctx.getEnd() - ctx.getStart()) {
                    if (failed != null)
                        failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleBadLength|" + lifted.length()).make());
                    continue;
                }
                final VariantContextBuilder vcb = new VariantContextBuilder(ctx.getSource(), lifted.getContig(), lifted.getStart(), lifted.getEnd(), alleles);
                vcb.id(ctx.getID());
                vcb.attributes(ctx.getAttributes());
                vcb.attribute(this.infoTag, ctx.getContig() + "|" + ctx.getStart() + "|" + ctx.getReference().getDisplayString());
                vcb.filters(ctx.getFilters());
                vcb.log10PError(ctx.getLog10PError());
                final GenotypesContext genotypeContext = ctx.getGenotypes();
                final GenotypesContext fixedGenotypes = GenotypesContext.create(genotypeContext.size());
                for (final Genotype genotype : genotypeContext) {
                    final List<Allele> fixedAlleles = new ArrayList<Allele>();
                    for (final Allele allele : genotype.getAlleles()) {
                        final Allele fixedAllele = reverseComplementAlleleMap.containsKey(allele) ? reverseComplementAlleleMap.get(allele) : allele;
                        fixedAlleles.add(fixedAllele);
                    }
                    fixedGenotypes.add(new GenotypeBuilder(genotype).alleles(fixedAlleles).make());
                }
                vcb.genotypes(fixedGenotypes);
                out.add(vcb.make());
            }
        }
        if (failed != null) {
            failed.close();
            failed = null;
        }
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(failed);
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) SequenceUtil(htsjdk.samtools.util.SequenceUtil) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) LiftOver(htsjdk.samtools.liftover.LiftOver) Interval(htsjdk.samtools.util.Interval) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Logger(com.github.lindenb.jvarkit.util.log.Logger) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Set(java.util.Set) Collectors(java.util.stream.Collectors) File(java.io.File) GenotypesContext(htsjdk.variant.variantcontext.GenotypesContext) List(java.util.List) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) GenotypesContext(htsjdk.variant.variantcontext.GenotypesContext) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) ArrayList(java.util.ArrayList) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashMap(java.util.HashMap) Map(java.util.Map) Interval(htsjdk.samtools.util.Interval)

Aggregations

VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)55 VariantContext (htsjdk.variant.variantcontext.VariantContext)39 VCFHeader (htsjdk.variant.vcf.VCFHeader)35 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)30 ArrayList (java.util.ArrayList)28 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)26 IOException (java.io.IOException)24 File (java.io.File)22 HashSet (java.util.HashSet)19 List (java.util.List)19 Genotype (htsjdk.variant.variantcontext.Genotype)18 Parameter (com.beust.jcommander.Parameter)17 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)17 Program (com.github.lindenb.jvarkit.util.jcommander.Program)17 Logger (com.github.lindenb.jvarkit.util.log.Logger)17 Set (java.util.Set)17 Allele (htsjdk.variant.variantcontext.Allele)16 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)15 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)15 JvarkitException (com.github.lindenb.jvarkit.lang.JvarkitException)14