Search in sources :

Example 6 with Pedigree

use of com.github.lindenb.jvarkit.util.Pedigree in project jvarkit by lindenb.

the class VcfSkatSlidingWindow method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.nJobs < 1) {
        this.nJobs = Math.max(1, Runtime.getRuntime().availableProcessors());
        LOG.info("setting njobs to " + this.nJobs);
    }
    VcfIterator r = null;
    try {
        final VCFHeader header;
        final SAMSequenceDictionary dict;
        final File vcfFile = new File(oneAndOnlyOneFile(args));
        try (final VCFFileReader vr = new VCFFileReader(vcfFile, true)) {
            header = vr.getFileHeader();
            dict = header.getSequenceDictionary();
        }
        if (dict == null || dict.isEmpty()) {
            throw new JvarkitException.VcfDictionaryMissing(vcfFile);
        }
        if (!this.limit_contigs.isEmpty()) {
            if (this.limit_contigs.stream().anyMatch(C -> dict.getSequence(C) == null)) {
                LOG.error("user contig missing in vcf dictionary.");
                return -1;
            }
        }
        final Pedigree pedigree;
        if (this.pedigreeFile != null) {
            pedigree = new Pedigree.Parser().parse(this.pedigreeFile);
        } else {
            pedigree = new Pedigree.Parser().parse(header);
        }
        final Set<Pedigree.Person> samples = new HashSet<>(pedigree.getPersons());
        samples.removeIf(I -> !(I.isAffected() || I.isUnaffected()) || !header.getSampleNamesInOrder().contains(I.getId()));
        this.writer = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        final Consumer<SkatCallerResult> writeResult = (R) -> {
            synchronized (this.writer) {
                this.writer.println(R.toString());
            }
        };
        for (final SAMSequenceRecord ssr : dict.getSequences()) {
            if (!this.limit_contigs.isEmpty() && !this.limit_contigs.contains(ssr.getSequenceName())) {
                LOG.warning("skipping contig " + ssr.getSequenceName());
                continue;
            }
            LOG.info("contig " + ssr.getSequenceName());
            final ExecutorService executorService = new ThreadPoolExecutor(this.nJobs, this.nJobs, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingDeque<>(this.nJobs));
            final List<Future<Integer>> results = new ArrayList<>(this.nJobs);
            for (int i = 0; i < this.nJobs; i++) {
                final int windowLen = Math.max(1, ssr.getSequenceLength() / this.nJobs);
                final SkatWorker caller = new SkatWorker(vcfFile, new Interval(ssr.getSequenceName(), i * windowLen, Math.min(ssr.getSequenceLength(), (i + 1) * windowLen)), samples, this.skat.build(), writeResult);
                results.add(executorService.submit(caller));
            }
            executorService.shutdown();
            executorService.awaitTermination(365, TimeUnit.DAYS);
            for (final Future<Integer> fl : results) {
                try {
                    if (fl.get() != 0) {
                        LOG.error("An error occured");
                        return -1;
                    }
                } catch (final Exception err) {
                    LOG.error(err);
                    return -1;
                }
            }
        }
        this.writer.flush();
        this.writer.close();
        this.writer = null;
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
        CloserUtil.close(this.writer);
    }
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) Parameter(com.beust.jcommander.Parameter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) Callable(java.util.concurrent.Callable) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval) Future(java.util.concurrent.Future) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) ExecutorService(java.util.concurrent.ExecutorService) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) Logger(com.github.lindenb.jvarkit.util.log.Logger) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) List(java.util.List) LinkedBlockingDeque(java.util.concurrent.LinkedBlockingDeque) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) File(java.io.File) Interval(htsjdk.samtools.util.Interval)

Example 7 with Pedigree

use of com.github.lindenb.jvarkit.util.Pedigree in project jvarkit by lindenb.

the class GroupByGene method read.

private void read(final String input) throws IOException {
    LineIterator lineiter = null;
    SortingCollection<Call> sortingCollection = null;
    try {
        final Pattern regexType = (StringUtil.isBlank(this.typeRegexExclude) ? null : Pattern.compile(this.typeRegexExclude));
        lineiter = (input == null ? IOUtils.openStreamForLineIterator(stdin()) : IOUtils.openURIForLineIterator(input));
        sortingCollection = SortingCollection.newInstance(Call.class, new CallCodec(), (C1, C2) -> {
            int i = C1.compareTo(C2);
            if (i != 0)
                return i;
            return C1.line.compareTo(C2.line);
        }, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        sortingCollection.setDestructiveIteration(true);
        final VCFUtils.CodecAndHeader cah = VCFUtils.parseHeader(lineiter);
        final VCFHeader header = cah.header;
        this.the_dictionary = header.getSequenceDictionary();
        if (this.the_dictionary == null || this.the_dictionary.isEmpty()) {
            throw new JvarkitException.DictionaryMissing(input);
        }
        this.the_codec = cah.codec;
        final List<String> sampleNames;
        if (header.getSampleNamesInOrder() != null) {
            sampleNames = header.getSampleNamesInOrder();
        } else {
            sampleNames = Collections.emptyList();
        }
        final VcfTools vcfTools = new VcfTools(header);
        final Pedigree pedigree;
        if (this.pedigreeFile != null) {
            pedigree = Pedigree.newParser().parse(this.pedigreeFile);
        } else {
            pedigree = Pedigree.newParser().parse(header);
        }
        final Pattern tab = Pattern.compile("[\t]");
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(the_dictionary).logger(LOG);
        while (lineiter.hasNext()) {
            String line = lineiter.next();
            final VariantContext ctx = progress.watch(this.the_codec.decode(line));
            if (!ctx.isVariant())
                continue;
            if (ignore_filtered && ctx.isFiltered())
                continue;
            // simplify line
            final String[] tokens = tab.split(line);
            // ID
            tokens[2] = VCFConstants.EMPTY_ID_FIELD;
            // QUAL
            tokens[5] = VCFConstants.MISSING_VALUE_v4;
            // FILTER
            tokens[6] = VCFConstants.UNFILTERED;
            // INFO
            tokens[7] = VCFConstants.EMPTY_INFO_FIELD;
            line = String.join(VCFConstants.FIELD_SEPARATOR, Arrays.asList(tokens));
            for (final GeneName g : getGenes(vcfTools, ctx)) {
                if (regexType != null && regexType.matcher(g.type).matches())
                    continue;
                final Call c = new Call();
                c.line = line;
                c.gene = g;
                sortingCollection.add(c);
            }
        }
        CloserUtil.close(lineiter);
        lineiter = null;
        sortingCollection.doneAdding();
        /**
         * dump
         */
        final Set<String> casesSamples = pedigree.getPersons().stream().filter(P -> P.isAffected()).map(P -> P.getId()).filter(ID -> sampleNames.contains(ID)).collect(Collectors.toSet());
        final Set<String> controlsSamples = pedigree.getPersons().stream().filter(P -> P.isUnaffected()).map(P -> P.getId()).filter(ID -> sampleNames.contains(ID)).collect(Collectors.toSet());
        final Set<String> maleSamples = pedigree.getPersons().stream().filter(P -> P.isMale()).map(P -> P.getId()).filter(ID -> sampleNames.contains(ID)).collect(Collectors.toSet());
        final Set<String> femaleSamples = pedigree.getPersons().stream().filter(P -> P.isFemale()).map(P -> P.getId()).filter(ID -> sampleNames.contains(ID)).collect(Collectors.toSet());
        final Predicate<Genotype> genotypeFilter = genotype -> {
            if (!genotype.isAvailable())
                return false;
            if (!genotype.isCalled())
                return false;
            if (genotype.isNoCall())
                return false;
            if (genotype.isHomRef())
                return false;
            if (this.ignore_filtered_genotype && genotype.isFiltered())
                return false;
            return true;
        };
        PrintStream pw = openFileOrStdoutAsPrintStream(this.outFile);
        pw.print("#chrom");
        pw.print('\t');
        pw.print("min.POS");
        pw.print('\t');
        pw.print("max.POS");
        pw.print('\t');
        pw.print("gene.name");
        pw.print('\t');
        pw.print("gene.type");
        pw.print('\t');
        pw.print("samples.affected");
        pw.print('\t');
        pw.print("count.variations");
        if (!casesSamples.isEmpty()) {
            pw.print('\t');
            pw.print("pedigree.cases");
        }
        if (!controlsSamples.isEmpty()) {
            pw.print('\t');
            pw.print("pedigree.controls");
        }
        if (!maleSamples.isEmpty()) {
            pw.print('\t');
            pw.print("pedigree.males");
        }
        if (!femaleSamples.isEmpty()) {
            pw.print('\t');
            pw.print("pedigree.females");
        }
        if (this.print_fisher && !controlsSamples.isEmpty() && !casesSamples.isEmpty()) {
            pw.print('\t');
            pw.print("fisher");
        }
        for (final String sample : sampleNames) {
            pw.print('\t');
            pw.print(sample);
        }
        pw.println();
        final CloseableIterator<Call> iter = sortingCollection.iterator();
        final EqualRangeIterator<Call> eqiter = new EqualRangeIterator<>(iter, (C1, C2) -> C1.compareTo(C2));
        while (eqiter.hasNext()) {
            final List<Call> row = eqiter.next();
            final Call first = row.get(0);
            final List<VariantContext> variantList = row.stream().map(R -> GroupByGene.this.the_codec.decode(R.line)).collect(Collectors.toList());
            final int minPos = variantList.stream().mapToInt(R -> R.getStart()).min().getAsInt();
            final int maxPos = variantList.stream().mapToInt(R -> R.getEnd()).max().getAsInt();
            final Set<String> sampleCarryingMut = new HashSet<String>();
            final Counter<String> pedCasesCarryingMut = new Counter<String>();
            final Counter<String> pedCtrlsCarryingMut = new Counter<String>();
            final Counter<String> malesCarryingMut = new Counter<String>();
            final Counter<String> femalesCarryingMut = new Counter<String>();
            final Counter<String> sample2count = new Counter<String>();
            for (final VariantContext ctx : variantList) {
                for (final Genotype genotype : ctx.getGenotypes()) {
                    if (!genotypeFilter.test(genotype))
                        continue;
                    final String sampleName = genotype.getSampleName();
                    sample2count.incr(sampleName);
                    sampleCarryingMut.add(sampleName);
                    if (casesSamples.contains(sampleName)) {
                        pedCasesCarryingMut.incr(sampleName);
                    }
                    if (controlsSamples.contains(sampleName)) {
                        pedCtrlsCarryingMut.incr(sampleName);
                    }
                    if (maleSamples.contains(sampleName)) {
                        malesCarryingMut.incr(sampleName);
                    }
                    if (femaleSamples.contains(sampleName)) {
                        femalesCarryingMut.incr(sampleName);
                    }
                }
            }
            pw.print(first.getContig());
            pw.print('\t');
            // convert to bed
            pw.print(minPos - 1);
            pw.print('\t');
            pw.print(maxPos);
            pw.print('\t');
            pw.print(first.gene.name);
            pw.print('\t');
            pw.print(first.gene.type);
            pw.print('\t');
            pw.print(sampleCarryingMut.size());
            pw.print('\t');
            pw.print(variantList.size());
            if (!casesSamples.isEmpty()) {
                pw.print('\t');
                pw.print(pedCasesCarryingMut.getCountCategories());
            }
            if (!controlsSamples.isEmpty()) {
                pw.print('\t');
                pw.print(pedCtrlsCarryingMut.getCountCategories());
            }
            if (!maleSamples.isEmpty()) {
                pw.print('\t');
                pw.print(malesCarryingMut.getCountCategories());
            }
            if (!femaleSamples.isEmpty()) {
                pw.print('\t');
                pw.print(femalesCarryingMut.getCountCategories());
            }
            if (this.print_fisher && !controlsSamples.isEmpty() && !casesSamples.isEmpty()) {
                int count_case_mut = 0;
                int count_ctrl_mut = 0;
                int count_case_wild = 0;
                int count_ctrl_wild = 0;
                for (final VariantContext ctx : variantList) {
                    for (final Genotype genotype : ctx.getGenotypes()) {
                        final String sampleName = genotype.getSampleName();
                        final boolean has_mutation = genotypeFilter.test(genotype);
                        if (controlsSamples.contains(sampleName)) {
                            if (has_mutation) {
                                count_ctrl_mut++;
                            } else {
                                count_ctrl_wild++;
                            }
                        } else if (casesSamples.contains(sampleName)) {
                            if (has_mutation) {
                                count_case_mut++;
                            } else {
                                count_case_wild++;
                            }
                        }
                    }
                }
                final FisherExactTest fisher = FisherExactTest.compute(count_case_mut, count_case_wild, count_ctrl_mut, count_ctrl_wild);
                pw.print('\t');
                pw.print(fisher.getAsDouble());
            }
            for (final String sample : sampleNames) {
                pw.print('\t');
                pw.print(sample2count.count(sample));
            }
            pw.println();
            if (pw.checkError())
                break;
        }
        eqiter.close();
        iter.close();
        pw.flush();
        if (this.outFile != null)
            pw.close();
    } finally {
        CloserUtil.close(lineiter);
        if (sortingCollection != null)
            sortingCollection.cleanup();
    }
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) DataInputStream(java.io.DataInputStream) CloseableIterator(htsjdk.samtools.util.CloseableIterator) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) Arrays(java.util.Arrays) LineIterator(htsjdk.tribble.readers.LineIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Function(java.util.function.Function) ParametersDelegate(com.beust.jcommander.ParametersDelegate) HashSet(java.util.HashSet) DataOutputStream(java.io.DataOutputStream) StringUtil(htsjdk.samtools.util.StringUtil) FisherExactTest(com.github.lindenb.jvarkit.math.stats.FisherExactTest) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VCFConstants(htsjdk.variant.vcf.VCFConstants) CloserUtil(htsjdk.samtools.util.CloserUtil) SnpEffPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.SnpEffPredictionParser) PrintStream(java.io.PrintStream) SortingCollection(htsjdk.samtools.util.SortingCollection) Counter(com.github.lindenb.jvarkit.util.Counter) AbstractVCFCodec(htsjdk.variant.vcf.AbstractVCFCodec) Predicate(java.util.function.Predicate) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) List(java.util.List) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VariantContext(htsjdk.variant.variantcontext.VariantContext) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) Pattern(java.util.regex.Pattern) Comparator(java.util.Comparator) Collections(java.util.Collections) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) VariantContext(htsjdk.variant.variantcontext.VariantContext) LineIterator(htsjdk.tribble.readers.LineIterator) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) Counter(com.github.lindenb.jvarkit.util.Counter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) Pattern(java.util.regex.Pattern) PrintStream(java.io.PrintStream) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Genotype(htsjdk.variant.variantcontext.Genotype) FisherExactTest(com.github.lindenb.jvarkit.math.stats.FisherExactTest) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) Pedigree(com.github.lindenb.jvarkit.util.Pedigree)

Example 8 with Pedigree

use of com.github.lindenb.jvarkit.util.Pedigree in project jvarkit by lindenb.

the class VcfToRdf method writeHeader.

private void writeHeader(final VCFHeader header, final URI source) {
    if (source != null) {
        emit(source, "rdf:type", "vcf:File");
    }
    final SAMSequenceDictionary dict = header.getSequenceDictionary();
    if (dict != null) {
        for (final SAMSequenceRecord ssr : dict.getSequences()) {
            emit(URI.create("urn:chrom/" + ssr.getSequenceName()), "rdf:type", "vcf:Chromosome", "dc:title", ssr.getSequenceName(), "vcf:length", ssr.getSequenceLength(), "vcf:sequenceIndex", ssr.getSequenceIndex());
        }
    }
    for (final VCFFilterHeaderLine h : header.getFilterLines()) {
        emit(URI.create("urn:filter/" + h.getID()), "rdf:type", "vcf:Filter", "dc:title", h.getID(), "dc:description", h.getValue());
    }
    final Pedigree pedigree = Pedigree.newParser().parse(header);
    for (final Pedigree.Person pe : pedigree.getPersons()) {
        final URI sample = URI.create("urn:sample/" + pe.getId());
        emit(sample, "rdf:type", "foaf:Person", "foaf:name", pe.getId(), "foaf:family", pe.getFamily().getId());
        if (pe.isMale())
            emit(sample, "idt:gender", "male");
        if (pe.isFemale())
            emit(sample, "idt:gender", "female");
        if (pe.isAffected())
            emit(sample, "idt:status", "affected");
        if (pe.isUnaffected())
            emit(sample, "idt:status", "unaffected");
    }
    // Sample
    for (final String sample : header.getSampleNamesInOrder()) {
        emit(URI.create("urn:sample/" + sample), "rdf:type", "vcf:Sample", "dc:title", sample);
    }
}
Also used : Pedigree(com.github.lindenb.jvarkit.util.Pedigree) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) URI(java.net.URI)

Example 9 with Pedigree

use of com.github.lindenb.jvarkit.util.Pedigree in project jvarkit by lindenb.

the class VcfOptimizePedForSkat method doWork.

@Override
public int doWork(final List<String> args) {
    VcfIterator r = null;
    try {
        this.random = new Random(this.seed == -1 ? System.currentTimeMillis() : seed);
        if (this.nSamplesRemove < 1) {
            LOG.error("bad number of samples to remove.");
            return -1;
        }
        final SkatFactory.SkatExecutor executor = this.skatInstance.build();
        r = super.openVcfIterator(oneFileOrNull(args));
        final List<VariantContext> variants = new ArrayList<>();
        while (r.hasNext()) {
            final VariantContext ctx = r.next();
            if (!executor.getUpstreamVariantFilter().test(ctx))
                continue;
            variants.add(ctx);
        }
        LOG.info("number of variants : " + variants.size());
        if (variants.stream().map(V -> V.getContig()).collect(Collectors.toSet()).size() != 1) {
            LOG.error("multiple contig/chromosome in input.");
            return -1;
        }
        final VCFHeader header = r.getHeader();
        final Set<String> sampleNames = new HashSet<>(header.getSampleNamesInOrder());
        if (this.pedigreeFile != null) {
            this.pedigree = new Pedigree.Parser().parse(this.pedigreeFile);
        } else {
            this.pedigree = new Pedigree.Parser().parse(header);
        }
        r.close();
        r = null;
        final List<Pedigree.Person> samples = this.pedigree.getPersons().stream().filter(P -> (P.isAffected() || P.isUnaffected())).filter(S -> sampleNames.contains(S.getId())).collect(Collectors.toList());
        if (samples.isEmpty()) {
            LOG.error("Not enough Samples in pedigree/vcf");
            return -1;
        }
        if (!samples.stream().anyMatch(P -> P.isAffected())) {
            LOG.error("No affected Samples in pedigree/vcf");
            return -1;
        }
        if (!samples.stream().anyMatch(P -> P.isUnaffected())) {
            LOG.error("No unaffected Samples in pedigree/vcf");
            return -1;
        }
        long nIter = 0L;
        while (max_iterations == -1L || nIter < max_iterations) {
            exec(nIter, header, variants, samples, executor);
            ++nIter;
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
    }
}
Also used : Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) Random(java.util.Random) TreeSet(java.util.TreeSet) ParametersDelegate(com.beust.jcommander.ParametersDelegate) VcfBurdenFisherH(com.github.lindenb.jvarkit.tools.burden.VcfBurdenFisherH) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StringUtil(htsjdk.samtools.util.StringUtil) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) Logger(com.github.lindenb.jvarkit.util.log.Logger) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Set(java.util.Set) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) VcfBurdenMAF(com.github.lindenb.jvarkit.tools.burden.VcfBurdenMAF) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) Collections(java.util.Collections) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Random(java.util.Random) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet)

Example 10 with Pedigree

use of com.github.lindenb.jvarkit.util.Pedigree in project jvarkit by lindenb.

the class VCFFilterJS method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator r, final VariantContextWriter w) {
    try {
        final VCFHeader header = r.getHeader();
        final VcfTools vcfTools = new VcfTools(header);
        final VCFHeader h2 = new VCFHeader(header);
        addMetaData(h2);
        final Pedigree pedigree;
        if (pedigreeFile != null) {
            pedigree = Pedigree.newParser().parse(this.pedigreeFile);
        } else // try to read from VCF header
        {
            Pedigree p = null;
            try {
                p = Pedigree.newParser().parse(header);
            } catch (final Exception err) {
                LOG.warn("cannot decode pedigree from vcf header");
                p = Pedigree.createEmptyPedigree();
            }
            pedigree = p;
        }
        final VCFFilterHeaderLine filterHeaderLine = (filteredTag.trim().isEmpty() ? null : new VCFFilterHeaderLine(this.filteredTag.trim(), "Filtered with " + getProgramName()));
        if (filterHeaderLine != null)
            h2.addMetaDataLine(filterHeaderLine);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        final Bindings bindings = this.compiledScript.getEngine().createBindings();
        bindings.put("header", header);
        bindings.put("tools", vcfTools);
        bindings.put("pedigree", pedigree);
        bindings.put("individuals", Collections.unmodifiableList(pedigree.getPersons().stream().filter(P -> (P.isAffected() || P.isUnaffected())).filter(P -> P.hasUniqId()).filter(P -> header.getSampleNamesInOrder().contains(P.getId())).collect(Collectors.toList())));
        for (final String jsonkv : this.jsonFiles) {
            int eq = jsonkv.indexOf("=");
            if (eq <= 0)
                throw new JvarkitException.UserError("Bad format for json . expected key=/path/to/file.json but got '" + jsonkv + "'");
            final String key = jsonkv.substring(0, eq);
            final FileReader jsonFile = new FileReader(jsonkv.substring(eq + 1));
            JsonParser jsonParser = new JsonParser();
            final JsonElement root = jsonParser.parse(jsonFile);
            jsonFile.close();
            bindings.put(key, root);
        }
        w.writeHeader(h2);
        while (r.hasNext() && !w.checkError()) {
            final VariantContext variation = progress.watch(r.next());
            bindings.put("variant", variation);
            final Object result = compiledScript.eval(bindings);
            // result is an array of a collection of variants
            if (result != null && (result.getClass().isArray() || (result instanceof Collection))) {
                final Collection<?> col;
                if (result.getClass().isArray()) {
                    final Object[] array = (Object[]) result;
                    col = Arrays.asList(array);
                } else {
                    col = (Collection<?>) result;
                }
                // write all of variants
                for (final Object item : col) {
                    if (item == null)
                        throw new JvarkitException.UserError("item in array is null");
                    if (!(item instanceof VariantContext))
                        throw new JvarkitException.UserError("item in array is not a VariantContext " + item.getClass());
                    w.add(VariantContext.class.cast(item));
                }
            } else // result is a VariantContext
            if (result != null && (result instanceof VariantContext)) {
                w.add(VariantContext.class.cast(result));
            } else {
                boolean accept = true;
                if (result == null) {
                    accept = false;
                } else if (result instanceof Boolean) {
                    if (Boolean.FALSE.equals(result))
                        accept = false;
                } else if (result instanceof Number) {
                    if (((Number) result).intValue() != 1)
                        accept = false;
                } else {
                    LOG.warn("Script returned something that is not a boolean or a number:" + result.getClass());
                    accept = false;
                }
                if (!accept) {
                    if (filterHeaderLine != null) {
                        final VariantContextBuilder vcb = new VariantContextBuilder(variation);
                        vcb.filter(filterHeaderLine.getID());
                        w.add(vcb.make());
                    }
                    continue;
                }
                // set PASS filter if needed
                if (filterHeaderLine != null && !variation.isFiltered()) {
                    w.add(new VariantContextBuilder(variation).passFilters().make());
                    continue;
                }
                w.add(variation);
            }
        }
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
    }
}
Also used : Arrays(java.util.Arrays) Bindings(javax.script.Bindings) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Term(com.github.lindenb.semontology.Term) JsonParser(com.google.gson.JsonParser) ArrayList(java.util.ArrayList) JsonElement(com.google.gson.JsonElement) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) Collection(java.util.Collection) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) CompiledScript(javax.script.CompiledScript) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) VariantContext(htsjdk.variant.variantcontext.VariantContext) FileReader(java.io.FileReader) Collections(java.util.Collections) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VariantContext(htsjdk.variant.variantcontext.VariantContext) Bindings(javax.script.Bindings) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) JsonElement(com.google.gson.JsonElement) Collection(java.util.Collection) FileReader(java.io.FileReader) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) JsonParser(com.google.gson.JsonParser)

Aggregations

Pedigree (com.github.lindenb.jvarkit.util.Pedigree)13 VariantContext (htsjdk.variant.variantcontext.VariantContext)12 VCFHeader (htsjdk.variant.vcf.VCFHeader)11 Parameter (com.beust.jcommander.Parameter)10 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)10 Logger (com.github.lindenb.jvarkit.util.log.Logger)10 File (java.io.File)10 List (java.util.List)10 Program (com.github.lindenb.jvarkit.util.jcommander.Program)9 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)9 ArrayList (java.util.ArrayList)9 Collectors (java.util.stream.Collectors)9 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)8 CloserUtil (htsjdk.samtools.util.CloserUtil)8 Set (java.util.Set)8 JvarkitException (com.github.lindenb.jvarkit.lang.JvarkitException)7 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)7 ParametersDelegate (com.beust.jcommander.ParametersDelegate)6 StringUtil (htsjdk.samtools.util.StringUtil)6 Genotype (htsjdk.variant.variantcontext.Genotype)6