Search in sources :

Example 36 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VcfGeneEpistasis method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.geneBed == null) {
        LOG.error("gene file bed undefined");
        return -1;
    }
    if (this.outputFile == null) {
        LOG.error("output file undefined");
        return -1;
    }
    CloseableIterator<VariantContext> iter = null;
    try {
        final File vcfFile = new File(oneAndOnlyOneFile(args));
        this.vcfFileReader = new VCFFileReader(vcfFile, true);
        final VCFHeader header = this.vcfFileReader.getFileHeader();
        final Pedigree pedigree;
        if (this.pedigreeFile != null) {
            pedigree = new Pedigree.Parser().parse(this.pedigreeFile);
        } else {
            pedigree = new Pedigree.Parser().parse(header);
        }
        if (pedigree == null || pedigree.isEmpty() || !pedigree.hasAffected() || !pedigree.hasUnaffected()) {
            LOG.error("empty ped or no case/ctrl");
            return -1;
        }
        pedigree.verifyPersonsHaveUniqueNames();
        for (final Pedigree.Person p : pedigree.getPersons().stream().filter(P -> P.isAffected() || P.isUnaffected()).filter(P -> header.getSampleNamesInOrder().contains(P.getId())).collect(Collectors.toSet())) {
            this.id2samples.put(p.getId(), p);
        }
        this.vcfTools = new VcfTools(header);
        List<Interval> geneList;
        if (!this.geneBed.exists()) {
            final Map<String, Interval> gene2interval = new HashMap<>(50000);
            LOG.info("building gene file" + this.geneBed);
            iter = this.vcfFileReader.iterator();
            // iter = this.vcfFileReader.query("chr3",1,300_000_000);
            final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
            while (iter.hasNext()) {
                final VariantContext ctx = progress.watch(iter.next());
                if (!accept(ctx))
                    continue;
                for (final String geneName : getGenes(ctx)) {
                    final Interval old = gene2interval.get(geneName);
                    if (old == null) {
                        gene2interval.put(geneName, new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), false, geneName));
                        LOG.info("adding " + geneName + ". number of genes: " + gene2interval.size());
                    } else if (!old.getContig().equals(ctx.getContig())) {
                        LOG.error("boum :" + geneName + ": on chrom " + ctx.getContig() + " vs " + old);
                        return -1;
                    } else {
                        gene2interval.put(geneName, new Interval(ctx.getContig(), Math.min(ctx.getStart(), old.getStart()), Math.max(ctx.getEnd(), old.getEnd()), false, geneName));
                    }
                }
            }
            iter.close();
            iter = null;
            progress.finish();
            geneList = new ArrayList<>(gene2interval.values());
            PrintWriter pw = new PrintWriter(this.geneBed);
            for (final Interval g : geneList) {
                pw.println(g.getContig() + "\t" + (g.getStart() - 1) + "\t" + (g.getEnd()) + "\t" + g.getName());
            }
            pw.flush();
            pw.close();
            pw = null;
        } else {
            BedLineCodec codec = new BedLineCodec();
            BufferedReader r = IOUtil.openFileForBufferedReading(geneBed);
            geneList = r.lines().map(L -> codec.decode(L)).filter(B -> B != null).map(B -> new Interval(B.getContig(), B.getStart(), B.getEnd(), true, B.get(3))).collect(Collectors.toList());
            r.close();
        }
        if (geneList.isEmpty()) {
            LOG.error("gene List is empty");
            return -1;
        }
        final Comparator<VariantContext> ctxSorter = VCFUtils.createTidPosRefComparator(header.getSequenceDictionary());
        final Function<Interval, List<VariantContext>> loadVariants = (R) -> {
            List<VariantContext> L = new ArrayList<>();
            CloseableIterator<VariantContext> r = this.vcfFileReader.query(R.getContig(), R.getStart(), R.getEnd());
            while (r.hasNext()) {
                final VariantContext ctx = r.next();
                if (!accept(ctx))
                    continue;
                if (!getGenes(ctx).contains(R.getName()))
                    continue;
                L.add(ctx);
            }
            r.close();
            return L;
        };
        final SkatExecutor executor = this.skatFactory.build();
        Double bestSkat = null;
        LOG.info("number of genes : " + geneList.size());
        final int list_end_index = (this.user_end_index < 0 ? geneList.size() : Math.min(geneList.size(), this.user_end_index));
        for (int x = this.user_begin_index; x < list_end_index; ++x) {
            final Interval intervalx = geneList.get(x);
            final List<VariantContext> variantsx = loadVariants.apply(intervalx);
            if (variantsx.isEmpty())
                continue;
            for (int y = x; y < geneList.size(); /* pas list_end_index */
            ++y) {
                final Interval intervaly;
                final List<VariantContext> merge;
                if (y == x) {
                    // we-re testing gene 1 only
                    intervaly = intervalx;
                    merge = variantsx;
                } else {
                    intervaly = geneList.get(y);
                    if (intervaly.intersects(intervalx))
                        continue;
                    final List<VariantContext> variantsy = loadVariants.apply(intervaly);
                    if (variantsy.isEmpty())
                        continue;
                    merge = new MergedList<>(variantsx, variantsy);
                }
                LOG.info("testing : [" + x + "]" + intervalx + " [" + y + "]" + intervaly + " N:" + geneList.size() + " best: " + bestSkat);
                final Double skat = eval(executor, merge);
                if (skat == null)
                    continue;
                if (bestSkat == null || skat.compareTo(bestSkat) < 0) {
                    bestSkat = skat;
                    LOG.info("best " + bestSkat + " " + intervalx + " " + intervaly);
                    if (this.outputFile.getName().endsWith(".vcf") || this.outputFile.getName().endsWith(".vcf.gz")) {
                        final VCFHeader header2 = new VCFHeader(header);
                        header2.addMetaDataLine(new VCFHeaderLine(VcfGeneEpistasis.class.getName(), intervalx.getName() + " " + intervaly.getName() + " " + bestSkat));
                        final VariantContextWriter w = VCFUtils.createVariantContextWriter(outputFile);
                        w.writeHeader(header2);
                        merge.stream().sorted(ctxSorter).forEach(V -> w.add(V));
                        w.close();
                    } else {
                        final PrintWriter w = super.openFileOrStdoutAsPrintWriter(outputFile);
                        w.println(String.valueOf(bestSkat) + "\t" + intervalx.getName() + "\t" + intervaly.getName());
                        w.flush();
                        w.close();
                    }
                }
            }
        }
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(this.vcfFileReader);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) CloseableIterator(htsjdk.samtools.util.CloseableIterator) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) Program(com.github.lindenb.jvarkit.util.jcommander.Program) IOUtil(htsjdk.samtools.util.IOUtil) Parameter(com.beust.jcommander.Parameter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) AbstractList(java.util.AbstractList) HashMap(java.util.HashMap) Function(java.util.function.Function) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) Interval(htsjdk.samtools.util.Interval) SkatResult(com.github.lindenb.jvarkit.tools.skat.SkatFactory.SkatResult) StringUtil(htsjdk.samtools.util.StringUtil) SkatExecutor(com.github.lindenb.jvarkit.tools.skat.SkatFactory.SkatExecutor) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Map(java.util.Map) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SkatFactory(com.github.lindenb.jvarkit.tools.skat.SkatFactory) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) BufferedReader(java.io.BufferedReader) Comparator(java.util.Comparator) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) HashMap(java.util.HashMap) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) AbstractList(java.util.AbstractList) ArrayList(java.util.ArrayList) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter) CloseableIterator(htsjdk.samtools.util.CloseableIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) BufferedReader(java.io.BufferedReader) SkatExecutor(com.github.lindenb.jvarkit.tools.skat.SkatFactory.SkatExecutor) File(java.io.File) Interval(htsjdk.samtools.util.Interval)

Example 37 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class HaloplexParasite method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
    SamReader samReader = null;
    final List<Mutation> mutations = new ArrayList<>();
    try {
        final Set<File> bamFiles = Files.lines(this.bamList.toPath()).filter(T -> !(T.isEmpty() || T.startsWith("#"))).map(T -> new File(T)).collect(Collectors.toSet());
        final VCFHeader header = new VCFHeader();
        header.setSequenceDictionary(in.getHeader().getSequenceDictionary());
        final VCFFilterHeaderLine filter = new VCFFilterHeaderLine("HALOPLEXPARASITE", "fails Parasite Haloplex Sequence");
        final VCFInfoHeaderLine infoWord = new VCFInfoHeaderLine(filter.getID(), 1, VCFHeaderLineType.String, "Parasite Haloplex Sequence (Word|Count|Fraction)");
        super.addMetaData(header);
        out.writeHeader(header);
        header.addMetaDataLine(filter);
        header.addMetaDataLine(infoWord);
        while (in.hasNext()) {
            final VariantContext ctx = in.next();
            final VariantContextBuilder vcb = new VariantContextBuilder(inputName, ctx.getContig(), ctx.getStart(), ctx.getEnd(), ctx.getAlleles());
            if (!(ctx.isIndel() || ctx.isMixed())) {
                out.add(vcb.make());
                continue;
            }
            if (!vcb.getAlleles().stream().filter(A -> !(A.isSymbolic() || A.isNoCall() || A.length() < this.minClipSize)).findAny().isPresent()) {
                out.add(vcb.make());
                continue;
            }
            final Mutation mut = new Mutation(ctx);
            mutations.add(mut);
        }
        final Counter<String> words = new Counter<>();
        for (final File bamFile : bamFiles) {
            LOG.info("Opening " + bamFile);
            samReader = createSamReaderFactory().open(bamFile);
            for (final Mutation mut : mutations) {
                // words seen in that mutation : don't use a Counter
                final Set<String> mutWords = new HashSet<>();
                /* loop over reads overlapping that mutation */
                final SAMRecordIterator sri = samReader.queryOverlapping(mut.contig, mut.start, mut.end);
                while (sri.hasNext()) {
                    final SAMRecord rec = sri.next();
                    if (rec.getReadUnmappedFlag())
                        continue;
                    if (rec.isSecondaryOrSupplementary())
                        continue;
                    if (rec.getDuplicateReadFlag())
                        continue;
                    if (rec.getReadFailsVendorQualityCheckFlag())
                        continue;
                    final Cigar cigar = rec.getCigar();
                    if (cigar.numCigarElements() == 1)
                        continue;
                    final byte[] bases = rec.getReadBases();
                    int refpos = rec.getUnclippedStart();
                    int readpos = 0;
                    /* scan cigar overlapping that mutation */
                    for (final CigarElement ce : cigar) {
                        final CigarOperator op = ce.getOperator();
                        final int ref_end = refpos + (op.consumesReferenceBases() || op.isClipping() ? ce.getLength() : 0);
                        final int read_end = readpos + (op.consumesReadBases() ? ce.getLength() : 0);
                        /* check clip is large enough */
                        if (op.equals(CigarOperator.S) && ce.getLength() >= this.minClipSize) {
                            /* check clip overlap mutation */
                            if (!(ref_end < mut.start || refpos > mut.end)) {
                                /* break read of soft clip into words */
                                for (int x = 0; x + this.minClipSize <= ce.getLength(); ++x) {
                                    final String substr = new String(bases, readpos + x, this.minClipSize);
                                    if (!substr.contains("N")) {
                                        final String revcomp = AcidNucleics.reverseComplement(substr);
                                        mutWords.add(substr);
                                        if (!revcomp.equals(substr))
                                            mutWords.add(revcomp);
                                    }
                                }
                            }
                        }
                        refpos = ref_end;
                        readpos = read_end;
                    }
                }
                sri.close();
                for (final String w : mutWords) {
                    words.incr(w);
                }
            }
            // end of for(mutations)
            samReader.close();
            samReader = null;
        }
        LOG.info("mutations:" + mutations.size());
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        for (final Mutation mut : mutations) {
            progress.watch(mut.contig, mut.start);
            final VariantContextBuilder vcb = new VariantContextBuilder(inputName, mut.contig, mut.start, mut.end, mut.alleles);
            String worstString = null;
            Double worstFraction = null;
            final double totalWords = words.getTotal();
            for (final Allele a : mut.alleles) {
                if (a.isSymbolic() || a.isNoCall() || a.length() < this.minClipSize)
                    continue;
                for (int x = 0; x + this.minClipSize <= a.length(); ++x) {
                    final String substr = new String(a.getBases(), x, this.minClipSize);
                    final long count = words.count(substr);
                    final double fraction = count / totalWords;
                    if (worstFraction == null || worstFraction < fraction) {
                        worstString = substr + "|" + count + "|" + fraction;
                        worstFraction = fraction;
                    }
                }
            }
            if (worstString != null) {
                vcb.attribute(infoWord.getID(), worstString);
            }
            if (worstFraction != null && worstFraction.doubleValue() >= this.tresholdFraction) {
                vcb.filter(filter.getID());
            }
            out.add(vcb.make());
        }
        progress.finish();
        return RETURN_OK;
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(samReader);
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) Allele(htsjdk.variant.variantcontext.Allele) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) CigarElement(htsjdk.samtools.CigarElement) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) CigarOperator(htsjdk.samtools.CigarOperator) AcidNucleics(com.github.lindenb.jvarkit.util.bio.AcidNucleics) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) Counter(com.github.lindenb.jvarkit.util.Counter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Files(java.nio.file.Files) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SamReader(htsjdk.samtools.SamReader) Counter(com.github.lindenb.jvarkit.util.Counter) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) Cigar(htsjdk.samtools.Cigar) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File)

Example 38 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class IgvReview method saveVariants.

private File saveVariants(Window owner, final File f) {
    if (f == null)
        return null;
    VariantContextWriterBuilder vcb = new VariantContextWriterBuilder();
    VariantContextWriter w = null;
    try {
        SAMSequenceDictionary dict = this.vcfHeader.getSequenceDictionary();
        if (dict != null) {
            vcb.setReferenceDictionary(dict);
        }
        vcb.setOutputFile(f);
        final VCFHeader header2 = new VCFHeader(this.vcfHeader);
        if (header2.getFormatHeaderLine(this.reviewFormat.getID()) == null) {
            header2.addMetaDataLine(this.reviewFormat);
        }
        w = vcb.build();
        w.writeHeader(header2);
        int x = 0;
        while (x < this.genotypeTable.getItems().size()) {
            VariantContext v1 = this.genotypeTable.getItems().get(x).ctx;
            List<Genotype> genotypes = new ArrayList<>();
            genotypes.add(this.genotypeTable.getItems().get(x).makeGenotype());
            int y = x + 1;
            while (y < this.genotypeTable.getItems().size()) {
                VariantContext v2 = this.genotypeTable.getItems().get(y).ctx;
                // yes '!=' is enough
                if (v2 != v1)
                    break;
                genotypes.add(this.genotypeTable.getItems().get(y).makeGenotype());
                y++;
            }
            VariantContextBuilder vb = new VariantContextBuilder(v1);
            vb.genotypes(genotypes);
            w.add(vb.make());
            x = y;
        }
        w.close();
        return f;
    } catch (final Exception err) {
        JfxUtils.dialog().cause(err).show(owner);
        return null;
    } finally {
        CloserUtil.close(w);
    }
}
Also used : VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) BackingStoreException(java.util.prefs.BackingStoreException) IOException(java.io.IOException)

Example 39 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class Biostar251649 method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter w) {
    try {
        final VCFHeader header = new VCFHeader(in.getHeader());
        VCFInfoHeaderLine info5 = new VCFInfoHeaderLine(leftTag + extend, 1, VCFHeaderLineType.String, "Sequence on the 5' of mutation");
        VCFInfoHeaderLine info3 = new VCFInfoHeaderLine(rightTag + extend, 1, VCFHeaderLineType.String, "Sequence on the 3' of mutation");
        if (header.getInfoHeaderLine(info5.getID()) != null) {
            LOG.error("tag " + info5.getID() + " already present in VCF header");
            return -1;
        }
        if (header.getInfoHeaderLine(info3.getID()) != null) {
            LOG.error("tag " + info3.getID() + " already present in VCF header");
            return -1;
        }
        header.addMetaDataLine(info5);
        header.addMetaDataLine(info3);
        GenomicSequence chrom = null;
        w.writeHeader(header);
        while (in.hasNext()) {
            final VariantContext ctx = in.next();
            if (chrom == null || !chrom.getChrom().equals(ctx.getContig())) {
                chrom = new GenomicSequence(this.faidx, ctx.getContig());
            }
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            if (ctx.getStart() > 0) {
                final StringBuilder sb = new StringBuilder(this.extend);
                for (int i = 0; i < this.extend; ++i) {
                    final int pos0 = (ctx.getStart() - 2) - i;
                    if (pos0 <= 0)
                        continue;
                    sb.insert(0, chrom.charAt(pos0));
                }
                if (sb.length() > 0)
                    vcb.attribute(info5.getID(), sb.toString());
            }
            {
                final StringBuilder sb = new StringBuilder(this.extend);
                for (int i = 0; i < this.extend; ++i) {
                    int pos0 = ctx.getEnd() + i;
                    if (pos0 >= chrom.length())
                        break;
                    sb.append(chrom.charAt(pos0));
                }
                if (sb.length() > 0)
                    vcb.attribute(info3.getID(), sb.toString());
            }
            w.add(vcb.make());
        }
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(faidx);
    }
}
Also used : VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine)

Example 40 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VcfBurdenSplitter method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, File outorNull) {
    SortingCollection<KeyAndLine> sortingcollection = null;
    BufferedReader in = null;
    CloseableIterator<KeyAndLine> iter = null;
    PrintStream pw = null;
    PrintWriter allDiscardedLog = null;
    try {
        in = inputName == null ? IOUtils.openStreamForBufferedReader(stdin()) : IOUtils.openURIForBufferedReading(inputName);
        if (this.allFilteredFileOut != null) {
            allDiscardedLog = IOUtils.openFileForPrintWriter(this.allFilteredFileOut);
        }
        final VCFUtils.CodecAndHeader cah = VCFUtils.parseHeader(in);
        /**
         * find splitter by name
         */
        Splitter splitter = null;
        for (final Splitter s : this.splitters) {
            if (this.splitterName.equals(s.getName())) {
                splitter = s;
                break;
            }
        }
        if (splitter == null) {
            return wrapException("Cannot find a splitter named " + this.splitterName);
        }
        splitter.initialize(cah.header);
        LOG.info("splitter is " + splitter);
        pw = super.openFileOrStdoutAsPrintStream(outorNull);
        // read variants
        final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(cah.header);
        String prev_contig = null;
        for (; ; ) {
            final String line = in.readLine();
            final VariantContext variant = (line == null ? null : progess.watch(cah.codec.decode(line)));
            if (variant == null || !variant.getContig().equals(prev_contig)) {
                if (sortingcollection != null) {
                    sortingcollection.doneAdding();
                    iter = sortingcollection.iterator();
                    LOG.info("dumping data for CONTIG: \"" + prev_contig + "\"");
                    final EqualRangeIterator<KeyAndLine> eqiter = new EqualRangeIterator<>(iter, new Comparator<KeyAndLine>() {

                        @Override
                        public int compare(final KeyAndLine o1, final KeyAndLine o2) {
                            return o1.key.compareTo(o2.key);
                        }
                    });
                    while (eqiter.hasNext()) {
                        final List<KeyAndLine> buffer = eqiter.next();
                        final KeyAndLine first = buffer.get(0);
                        LOG.info(first.key);
                        final List<VariantContext> variants = new ArrayList<>(buffer.size());
                        boolean has_only_filtered = true;
                        for (final KeyAndLine kal : buffer) {
                            final VariantContext ctx = cah.codec.decode(kal.ctx);
                            variants.add(ctx);
                            if (isDebuggingVariant(ctx)) {
                                LOG.info("Adding variant to list for key " + kal.key + " " + shortName(ctx));
                            }
                            if (!ctx.getContig().equals(prev_contig)) {
                                eqiter.close();
                                return wrapException("illegal state");
                            }
                            if (!ctx.isFiltered() || this.acceptFiltered) {
                                has_only_filtered = false;
                            // break; NOOOONNN !!!
                            }
                        }
                        // all ctx are filtered
                        if (has_only_filtered) {
                            LOG.warn("ALL IS FILTERED in " + first.key);
                            if (allDiscardedLog != null) {
                                for (final VariantContext ctx : variants) {
                                    if (isDebuggingVariant(ctx)) {
                                        LOG.info("Variant " + shortName(ctx) + " is part of never filtered for " + first.key);
                                    }
                                    allDiscardedLog.println(String.join("\t", first.key, ctx.getContig(), String.valueOf(ctx.getStart()), ctx.getReference().getDisplayString(), ctx.getAlternateAllele(0).getDisplayString(), String.valueOf(ctx.getFilters())));
                                }
                            }
                            continue;
                        }
                        // save vcf file
                        final VariantContextWriter out = VCFUtils.createVariantContextWriterToOutputStream(IOUtils.uncloseableOutputStream(pw));
                        final VCFHeader header2 = addMetaData(new VCFHeader(cah.header));
                        header2.addMetaDataLine(new VCFHeaderLine(VCF_HEADER_SPLITKEY, first.key));
                        out.writeHeader(header2);
                        for (final VariantContext ctx : variants) {
                            if (isDebuggingVariant(ctx)) {
                                LOG.info("saving variant " + shortName(ctx) + " to final output with key=" + first.key);
                            }
                            out.add(ctx);
                        }
                        // yes because wrapped into IOUtils.encloseableOutputSream
                        out.close();
                        pw.flush();
                    }
                    eqiter.close();
                    iter.close();
                    iter = null;
                    // dispose sorting collection
                    sortingcollection.cleanup();
                    sortingcollection = null;
                }
                // EOF met
                if (variant == null)
                    break;
                prev_contig = variant.getContig();
            }
            if (sortingcollection == null) {
                /* create sorting collection for new contig */
                sortingcollection = SortingCollection.newInstance(KeyAndLine.class, new KeyAndLineCodec(), new KeyAndLineComparator(), this.writingSortingCollection.maxRecordsInRam, this.writingSortingCollection.getTmpPaths());
                sortingcollection.setDestructiveIteration(true);
            }
            if (variant.getAlternateAlleles().size() != 1) {
                return wrapException("Expected only one allele per variant. Please use VcfMultiToOneAllele https://github.com/lindenb/jvarkit/wiki/VcfMultiToOneAllele.");
            }
            // no check for ctx.ifFiltered here, we do this later.
            for (final String key : splitter.keys(variant)) {
                if (isDebuggingVariant(variant)) {
                    LOG.info("Adding variant with key " + key + " " + shortName(variant));
                }
                sortingcollection.add(new KeyAndLine(key, line));
            }
        }
        progess.finish();
        pw.flush();
        pw.close();
        pw = null;
        if (allDiscardedLog != null) {
            allDiscardedLog.flush();
            allDiscardedLog.close();
            allDiscardedLog = null;
        }
        return RETURN_OK;
    } catch (final Exception err) {
        return wrapException(err);
    } finally {
        CloserUtil.close(iter);
        if (sortingcollection != null)
            sortingcollection.cleanup();
        CloserUtil.close(in);
        CloserUtil.close(pw);
        CloserUtil.close(allDiscardedLog);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter) PrintStream(java.io.PrintStream) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) IOException(java.io.IOException) BufferedReader(java.io.BufferedReader)

Aggregations

VCFHeader (htsjdk.variant.vcf.VCFHeader)182 VariantContext (htsjdk.variant.variantcontext.VariantContext)113 File (java.io.File)93 ArrayList (java.util.ArrayList)79 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)73 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)64 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)63 HashSet (java.util.HashSet)60 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)58 IOException (java.io.IOException)55 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)52 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)49 Genotype (htsjdk.variant.variantcontext.Genotype)48 Allele (htsjdk.variant.variantcontext.Allele)47 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)47 List (java.util.List)44 Set (java.util.Set)38 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)36 CloserUtil (htsjdk.samtools.util.CloserUtil)35 Collectors (java.util.stream.Collectors)34