Search in sources :

Example 1 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class VcfCompareCallersOneSample method doWork.

@Override
public int doWork(final List<String> args) {
    File inputFile = null;
    final List<EqualRangeVcfIterator> listChallengers = new ArrayList<>();
    VariantContextWriter vcw = null;
    VCFIterator in = null;
    try {
        in = super.openVCFIterator(oneFileOrNull(args));
        VCFHeader header = in.getHeader();
        if (header.getNGenotypeSamples() != 1) {
            LOG.error("vcf must have only one sample");
            return -1;
        }
        final VCFHeader h2 = new VCFHeader(header);
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        JVarkitVersion.getInstance().addMetaData(getClass().getSimpleName(), h2);
        SAMSequenceDictionary dict = header.getSequenceDictionary();
        if (dict == null) {
            LOG.error(JvarkitException.VcfDictionaryMissing.getMessage("input"));
            return -1;
        }
        final Comparator<VariantContext> ctxComparator = VCFUtils.createTidPosComparator(dict);
        /* load files to be challenged */
        for (final File cf : this.challengerVcf) {
            // do not challenge vs itself
            if (inputFile != null && inputFile.equals(cf)) {
                LOG.error("Ignoring challenger (self): " + cf);
                continue;
            }
            VCFIterator cin = VCFUtils.createVCFIteratorFromFile(cf);
            VCFHeader ch = cin.getHeader();
            if (ch.getNGenotypeSamples() != 1) {
                LOG.warning("vcf.must.have.only.one.sample");
                cin.close();
                continue;
            }
            if (!header.getSampleNamesInOrder().get(0).equals(ch.getSampleNamesInOrder().get(0))) {
                LOG.warning("Ignoring " + cf + " because not the same sample.");
                cin.close();
                continue;
            }
            SAMSequenceDictionary hdict = ch.getSequenceDictionary();
            if (hdict == null || !SequenceUtil.areSequenceDictionariesEqual(dict, hdict)) {
                LOG.error("not.the.same.sequence.dictionaries");
                return -1;
            }
            listChallengers.add(new EqualRangeVcfIterator(cin, ctxComparator));
        }
        vcw = super.openVariantContextWriter(outputFile);
        vcw.writeHeader(h2);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        VariantContext prev_ctx = null;
        while (in.hasNext() && !vcw.checkError()) {
            VariantContext ctx = progress.watch(in.next());
            // check input order
            if (prev_ctx != null && ctxComparator.compare(prev_ctx, ctx) > 0) {
                LOG.error("bad sort order : got\n\t" + prev_ctx + "\nbefore\n\t" + ctx + "\n");
                return -1;
            }
            prev_ctx = ctx;
            int countInOtherFiles = 0;
            for (final EqualRangeVcfIterator citer : listChallengers) {
                boolean foundInThatFile = false;
                final List<VariantContext> ctxChallenging = citer.next(ctx);
                for (final VariantContext ctx2 : ctxChallenging) {
                    if (!ctx2.getReference().equals(ctx.getReference()))
                        continue;
                    boolean ok = true;
                    if (!this.ignoreAlternate) {
                        Set<Allele> myAlt = new HashSet<Allele>(ctx.getAlternateAlleles());
                        myAlt.removeAll(ctx2.getAlternateAlleles());
                        if (!myAlt.isEmpty())
                            ok = false;
                    }
                    if (ok) {
                        foundInThatFile = true;
                        break;
                    }
                }
                countInOtherFiles += (foundInThatFile ? 1 : 0);
            }
            if (countInOtherFiles >= minCountInclusive && countInOtherFiles <= maxCountInclusive) {
                vcw.add(ctx);
            }
        }
        progress.finish();
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(vcw);
        CloserUtil.close(listChallengers);
        CloserUtil.close(in);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Allele(htsjdk.variant.variantcontext.Allele) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) VCFIterator(htsjdk.variant.vcf.VCFIterator) HashSet(java.util.HashSet)

Example 2 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class KnimeVariantHelper method processVcfMulti.

/**
 * process the VCF file,
 *
 * @param vcfIn input file name
 * @param fun functional
 * @return the output file name
 * @throws IOException
 */
public String processVcfMulti(final String vcfIn, final Function<VariantContext, List<VariantContext>> fun) throws IOException {
    this.lastVariantCount = 0;
    if (vcfIn == null) {
        final String msg = "Vcf Input URI/FIle is null.";
        LOG.error(msg);
        throw new IllegalArgumentException(msg);
    }
    File outVcfFile = null;
    File outVcfIndexFile = null;
    final File STOP_FILE = new File(this.workfingDirectory, "STOP");
    if (STOP_FILE.exists()) {
        final String msg = "There is a stop file in " + STOP_FILE;
        LOG.error(msg);
        throw new IOException(msg);
    }
    boolean fail_flag = false;
    VCFIterator iter = null;
    VariantContextWriter variantContextWriter = null;
    try {
        IOUtil.assertDirectoryIsReadable(this.workfingDirectory);
        IOUtil.assertDirectoryIsWritable(this.workfingDirectory);
        if (!IOUtil.isUrl(vcfIn)) {
            IOUtil.assertFileIsReadable(new File(vcfIn));
        }
        final String extension;
        if (this.forceSuffix.equals(ForceSuffix.ForceTabix)) {
            extension = ".vcf.gz";
        } else if (this.forceSuffix.equals(ForceSuffix.ForceTribble)) {
            extension = ".vcf";
        } else if (vcfIn.endsWith(".gz")) {
            extension = ".vcf.gz";
        } else {
            extension = ".vcf";
        }
        final String filename = this.createOutputFile(vcfIn, extension);
        final String indexFilename;
        if (extension.endsWith(".gz")) {
            indexFilename = filename + FileExtensions.TRIBBLE_INDEX;
        } else {
            indexFilename = filename + FileExtensions.TABIX_INDEX;
        }
        outVcfFile = new File(filename);
        outVcfIndexFile = new File(indexFilename);
        LOG.info("opening " + vcfIn);
        iter = VCFUtils.createVCFIterator(vcfIn);
        super.init(iter.getHeader());
        final VCFHeader vcfHeader2;
        if (this.getExtraVcfHeaderLines().isEmpty()) {
            vcfHeader2 = iter.getHeader();
        } else {
            vcfHeader2 = new VCFHeader(iter.getHeader());
            for (final VCFHeaderLine extra : this.getExtraVcfHeaderLines()) {
                vcfHeader2.addMetaDataLine(extra);
            }
            // clear vcf header line now they 've been added to the header.
            this.getExtraVcfHeaderLines().clear();
        }
        final SAMSequenceDictionary dict = this.getHeader().getSequenceDictionary();
        if (dict == null) {
            final String msg = "There is no dictionary (##contig lines) in " + vcfIn + " but they are required.";
            LOG.error(msg);
            throw new IllegalArgumentException(msg);
        }
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        progress.setLogPrefix(this.filePrefix);
        LOG.info("writing " + outVcfFile + ". Emergency stop file is " + STOP_FILE);
        variantContextWriter = this.variantContextWriterBuilder.setOutputFile(outVcfFile).setReferenceDictionary(dict).build();
        long lastTick = System.currentTimeMillis();
        variantContextWriter.writeHeader(vcfHeader2);
        while (iter.hasNext()) {
            final VariantContext ctx = progress.watch(iter.next());
            final List<VariantContext> array = fun.apply(ctx);
            if (array != null) {
                for (final VariantContext ctx2 : array) {
                    variantContextWriter.add(ctx2);
                    this.lastVariantCount++;
                }
            }
            // check STOP File
            final long now = System.currentTimeMillis();
            if (// 10sec
            (now - lastTick) > 10 * 1000) {
                lastTick = now;
                if (STOP_FILE.exists()) {
                    LOG.warn("STOP FILE detected " + STOP_FILE + " Aborting.");
                    fail_flag = true;
                    break;
                }
            }
        }
        progress.finish();
        iter.close();
        iter = null;
        variantContextWriter.close();
        variantContextWriter = null;
        return outVcfFile.getPath();
    } catch (final Exception err) {
        fail_flag = true;
        LOG.error(err);
        throw new IOException(err);
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(variantContextWriter);
        if (fail_flag) {
            if (outVcfFile != null && outVcfFile.exists()) {
                LOG.warn("deleting " + outVcfFile);
                outVcfFile.delete();
            }
            if (outVcfIndexFile != null && outVcfIndexFile.exists()) {
                LOG.warn("deleting " + outVcfIndexFile);
                outVcfIndexFile.delete();
            }
        }
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VariantContext(htsjdk.variant.variantcontext.VariantContext) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) VCFIterator(htsjdk.variant.vcf.VCFIterator)

Example 3 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class OnePassVcfLauncher method doWork.

@Override
public int doWork(final List<String> args) {
    VCFIterator in = null;
    VariantContextWriter vcw = null;
    final String input = super.oneFileOrNull(args);
    if (input != null && this.outputFile != null && !IOUtil.isUrl(input) && Paths.get(input).equals(this.outputFile)) {
        LOG.error("Input == output : " + input);
        return -1;
    }
    try {
        if (beforeVcf() != 0) {
            LOG.error("initialization failed");
            return -1;
        }
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    }
    try {
        final BcfIteratorBuilder bcb = new BcfIteratorBuilder();
        if (input == null) {
            in = bcb.open(stdin());
        } else {
            in = bcb.open(input);
        }
        if (getLogger() != null) {
            in = new VCFIter(in, getLogger());
        }
        vcw = this.writingVariantsDelegate.dictionary(in.getHeader()).open(this.outputFile);
        final int err = doVcfToVcf(input == null ? "<stdin>" : input, in, vcw);
        vcw.close();
        vcw = null;
        in.close();
        in = null;
        if (err != 0)
            deleteOutputOnError();
        return err;
    } catch (final Throwable err) {
        LOG.error(err);
        if (vcw != null)
            vcw.close();
        deleteOutputOnError();
        return -1;
    } finally {
        try {
            afterVcf();
        } catch (final Throwable err) {
            LOG.error(err);
        }
        if (in != null)
            in.close();
        if (vcw != null)
            vcw.close();
    }
}
Also used : VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFIterator(htsjdk.variant.vcf.VCFIterator) BcfIteratorBuilder(com.github.lindenb.jvarkit.variant.vcf.BcfIteratorBuilder)

Example 4 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class VcfEvaiAnnot method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VCFIterator iterin, final VariantContextWriter out) {
    final VCFHeader header0 = iterin.getHeader();
    final VCFHeader header2 = new VCFHeader(header0);
    final Set<VCFHeaderLine> meta = new HashSet<>();
    /* evai fields */
    this.all_evai.stream().flatMap(T -> T.column2index.keySet().stream()).filter(T -> isEvaiBooleanField(T)).map(T -> new VCFInfoHeaderLine(EVAI_PFX + T, 1, VCFHeaderLineType.Integer, T)).forEach(H -> meta.add(H));
    meta.add(new VCFInfoHeaderLine(EVAI_PFX + "FINAL_CLASSIFICATION", 1, VCFHeaderLineType.String, "FINAL_CLASSIFICATION"));
    meta.add(new VCFInfoHeaderLine(INTERVAR_PFX + "PVS1", 1, VCFHeaderLineType.Integer, "From intervar"));
    for (int i = 0; i < 5; i++) meta.add(new VCFInfoHeaderLine(INTERVAR_PFX + "PS" + i, 1, VCFHeaderLineType.Integer, "From intervar"));
    for (int i = 0; i < 7; i++) meta.add(new VCFInfoHeaderLine(INTERVAR_PFX + "PM" + i, 1, VCFHeaderLineType.Integer, "From intervar"));
    for (int i = 0; i < 6; i++) meta.add(new VCFInfoHeaderLine(INTERVAR_PFX + "PP" + i, 1, VCFHeaderLineType.Integer, "From intervar"));
    meta.add(new VCFInfoHeaderLine(INTERVAR_PFX + "BA1", 1, VCFHeaderLineType.Integer, "From intervar"));
    for (int i = 0; i < 5; i++) meta.add(new VCFInfoHeaderLine(INTERVAR_PFX + "BS" + i, 1, VCFHeaderLineType.Integer, "From intervar"));
    for (int i = 0; i < 8; i++) meta.add(new VCFInfoHeaderLine(INTERVAR_PFX + "BP" + i, 1, VCFHeaderLineType.Integer, "From intervar"));
    meta.add(new VCFInfoHeaderLine(INTERVAR_PFX + "_CLASSIFICATION", 1, VCFHeaderLineType.String, "FINAL_CLASSIFICATION"));
    meta.stream().forEach(M -> header2.addMetaDataLine(M));
    JVarkitVersion.getInstance().addMetaData(this, header2);
    final ProgressFactory.Watcher<VariantContext> progress = ProgressFactory.newInstance().dictionary(header0).logger(LOG).build();
    out.writeHeader(header2);
    while (iterin.hasNext()) {
        final VariantContext ctx = progress.apply(iterin.next());
        if (this.ignore_filtered && ctx.isFiltered()) {
            out.add(ctx);
            continue;
        }
        final Map<String, Object> attributes = new HashMap<>();
        challengeEvai(attributes, ctx);
        challengeIntervar(attributes, ctx);
        if (attributes.isEmpty()) {
            out.add(ctx);
        } else {
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            for (final String k : attributes.keySet()) vcb.attribute(k, attributes.get(k));
            out.add(vcb.make());
        }
    }
    progress.close();
    return 0;
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) CharSplitter(com.github.lindenb.jvarkit.lang.CharSplitter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) TabixReader(htsjdk.tribble.readers.TabixReader) Path(java.nio.file.Path) Locatable(htsjdk.samtools.util.Locatable) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Files(java.nio.file.Files) Logger(com.github.lindenb.jvarkit.util.log.Logger) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) Set(java.util.Set) IOException(java.io.IOException) JVarkitVersion(com.github.lindenb.jvarkit.util.JVarkitVersion) File(java.io.File) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) Closeable(java.io.Closeable) CoordMath(htsjdk.samtools.util.CoordMath) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Optional(java.util.Optional) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ProgressFactory(com.github.lindenb.jvarkit.util.log.ProgressFactory) HashMap(java.util.HashMap) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet)

Example 5 with VCFIterator

use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.

the class VcfEnsemblReg method annotate.

private void annotate(Track track, File inf, File outf) throws IOException {
    boolean contained = false;
    LOG.info("Processing " + track.id + " (" + track.shortLabel + ") " + track.url);
    VCFIterator in = VCFUtils.createVCFIteratorFromFile(inf);
    VCFHeader header = in.getHeader();
    VCFInfoHeaderLine info = null;
    BBFileReader bigFile = new BBFileReader(track.url.toString());
    VariantContextWriter w1 = VCFUtils.createVariantContextWriter(outf);
    if (bigFile.isBigWigFile()) {
        info = new VCFInfoHeaderLine(track.id, 1, VCFHeaderLineType.Float, String.valueOf(track.longLabel) + " " + track.url);
    } else {
        info = new VCFInfoHeaderLine(track.id, 1, VCFHeaderLineType.String, String.valueOf(track.longLabel) + " " + track.url);
    }
    header.addMetaDataLine(info);
    w1.writeHeader(in.getHeader());
    while (in.hasNext()) {
        VariantContext ctx = in.next();
        String chrom = ctx.getContig();
        if (!chrom.startsWith("chr"))
            chrom = "chr" + chrom;
        if (!chrom.matches("(chrX|chrY|chr[0-9]|chr1[0-9]|chr2[12])")) {
            w1.add(ctx);
        } else if (bigFile.isBigWigFile()) {
            BigWigIterator iter = bigFile.getBigWigIterator(chrom, ctx.getStart() - 1, chrom, ctx.getStart(), contained);
            Float wigValue = null;
            while (iter != null && iter.hasNext() && wigValue == null) {
                WigItem item = iter.next();
                wigValue = item.getWigValue();
            }
            if (wigValue == null) {
                w1.add(ctx);
                continue;
            }
            VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.attribute(track.id, wigValue);
            w1.add(vcb.make());
        } else {
            BigBedIterator iter = bigFile.getBigBedIterator(chrom, ctx.getStart() - 1, chrom, ctx.getStart(), contained);
            Set<String> bedValues = new HashSet<String>();
            while (iter != null && iter.hasNext()) {
                BedFeature item = iter.next();
                String[] rest = item.getRestOfFields();
                if (rest == null || rest.length != 6) {
                    System.err.println(track.id + " " + Arrays.toString(item.getRestOfFields()));
                    continue;
                }
                String color = null;
                if (track.parent != null) {
                    if (track.parent.startsWith("Segway_17SegmentationSummaries")) {
                        color = segway_17SegmentationSummaries(rest[5]);
                    } else if (track.parent.startsWith("ProjectedSegments")) {
                        color = projectedSegments(rest[5]);
                    } else if (track.parent.startsWith("RegBuildOverview")) {
                        color = regBuildOverview(rest[5]);
                    } else if (track.parent.startsWith("Segway_17CellSegments")) {
                        color = segway_17CellSegments(rest[5]);
                    } else {
                        System.err.println("Unknown parent:" + track.parent);
                    }
                }
                if (color == null)
                    continue;
                bedValues.add(rest[0] + "|" + color);
            }
            if (bedValues.isEmpty()) {
                w1.add(ctx);
                continue;
            }
            StringBuilder sb = new StringBuilder();
            for (String s : bedValues) {
                if (sb.length() != 0)
                    sb.append(",");
                sb.append(s);
            }
            VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.attribute(track.id, sb.toString());
            w1.add(vcb.make());
        }
    }
    in.close();
    w1.close();
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) VariantContext(htsjdk.variant.variantcontext.VariantContext) BigBedIterator(org.broad.igv.bbfile.BigBedIterator) BedFeature(org.broad.igv.bbfile.BedFeature) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) WigItem(org.broad.igv.bbfile.WigItem) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BBFileReader(org.broad.igv.bbfile.BBFileReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFIterator(htsjdk.variant.vcf.VCFIterator) BigWigIterator(org.broad.igv.bbfile.BigWigIterator)

Aggregations

VCFIterator (htsjdk.variant.vcf.VCFIterator)98 VariantContext (htsjdk.variant.variantcontext.VariantContext)83 VCFHeader (htsjdk.variant.vcf.VCFHeader)76 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)62 Parameter (com.beust.jcommander.Parameter)56 Program (com.github.lindenb.jvarkit.util.jcommander.Program)56 Logger (com.github.lindenb.jvarkit.util.log.Logger)56 ArrayList (java.util.ArrayList)52 List (java.util.List)52 Set (java.util.Set)49 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)46 Collectors (java.util.stream.Collectors)45 HashSet (java.util.HashSet)42 Path (java.nio.file.Path)39 IOException (java.io.IOException)37 Allele (htsjdk.variant.variantcontext.Allele)36 CloserUtil (htsjdk.samtools.util.CloserUtil)35 Genotype (htsjdk.variant.variantcontext.Genotype)33 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)32 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)32