Search in sources :

Example 16 with Sample

use of com.github.lindenb.jvarkit.pedigree.Sample in project jvarkit by lindenb.

the class VCFComposite method scan.

private void scan(final GeneIdentifier geneKey, final List<VariantLine> variants) {
    int pair_index = 0;
    if (variants.size() < 2) {
        return;
    }
    // test if this gene is a big pool of variant, false positive. e.g: highly variables genes.
    if (max_number_of_variant_per_gene >= 0) {
        if (variants.size() > max_number_of_variant_per_gene) {
            LOG.warn("Too many variants " + variants.size() + " for " + geneKey);
            return;
        }
    }
    final Set<Sample> samples_in_gene = new HashSet<>(this.affectedSamples.size());
    // search for the one snp
    for (int x = 0; x + 1 < variants.size(); ++x) {
        final VariantContext vcx = variants.get(x).ctx;
        for (int y = x + 1; y < variants.size(); ++y) {
            final VariantContext vcy = variants.get(y).ctx;
            final Set<Sample> matching_affected_samples = new HashSet<>(this.affectedSamples.size());
            // loop over affected samples
            for (final Sample affected : this.affectedSamples) {
                final Genotype gcx = vcx.getGenotype(affected.getId());
                // child variant  n. y  must be HOM_VAR or HET
                if (gcx == null || !isGenotypeForAffected(gcx))
                    continue;
                // filtered ?
                if (!this.genotypeFilter.test(vcx, gcx))
                    continue;
                final Genotype gcy = vcy.getGenotype(affected.getId());
                // child variant n. y must be HOM_VAR or HET
                if (gcy == null || !isGenotypeForAffected(gcy))
                    continue;
                // filtered ?
                if (!this.genotypeFilter.test(vcy, gcy))
                    continue;
                boolean unaffected_are_ok = true;
                // check unaffected indididual don't have same haplotype
                for (final Sample unaffected : this.unaffectedSamples) {
                    final Genotype gux = vcx.getGenotype(unaffected.getId());
                    if (gux != null && gux.isHomVar()) {
                        unaffected_are_ok = false;
                        break;
                    }
                    final Genotype guy = vcy.getGenotype(unaffected.getId());
                    if (guy != null && guy.isHomVar()) {
                        unaffected_are_ok = false;
                        break;
                    }
                    if (gux != null && guy != null && gux.sameGenotype(gcx, true) && guy.sameGenotype(gcy, true)) {
                        unaffected_are_ok = false;
                        break;
                    }
                }
                if (!unaffected_are_ok)
                    continue;
                matching_affected_samples.add(affected);
            }
            boolean affected_are_ok;
            switch(this.pair_selection) {
                case all:
                    affected_are_ok = matching_affected_samples.size() == this.affectedSamples.size();
                    break;
                case any:
                    affected_are_ok = !matching_affected_samples.isEmpty();
                    break;
                default:
                    throw new IllegalStateException();
            }
            if (affected_are_ok) {
                samples_in_gene.addAll(matching_affected_samples);
                if (this.reportPath != null) {
                    this.reportWriter.print(geneKey.contig);
                    this.reportWriter.print('\t');
                    this.reportWriter.print(Math.min(vcx.getStart(), vcy.getStart()) - 1);
                    this.reportWriter.print('\t');
                    this.reportWriter.print(Math.max(vcx.getEnd(), vcy.getEnd()));
                    this.reportWriter.print('\t');
                    this.reportWriter.print(++pair_index);
                    this.reportWriter.print('\t');
                    this.reportWriter.print(geneKey.geneName);
                    this.reportWriter.print('\t');
                    this.reportWriter.print(geneKey.label);
                    this.reportWriter.print('\t');
                    this.reportWriter.print(geneKey.source);
                    for (int side = 0; side < 2; ++side) {
                        final VariantContext vc = (side == 0 ? vcx : vcy);
                        this.reportWriter.print('\t');
                        this.reportWriter.print(vc.getStart());
                        this.reportWriter.print('\t');
                        this.reportWriter.print(vc.getEnd());
                        this.reportWriter.print('\t');
                        this.reportWriter.print(vc.getReference().getDisplayString());
                        this.reportWriter.print('\t');
                        this.reportWriter.print(vc.getAlternateAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining(",")));
                        this.reportWriter.print('\t');
                        this.reportWriter.print(vc.getAttributes().keySet().stream().filter(K -> K.equals(AnnPredictionParser.getDefaultTag()) || K.equals(VepPredictionParser.getDefaultTag())).map(K -> K + "=" + String.join(",", vc.getAttributeAsStringList(K, ""))).collect(Collectors.joining(";")));
                        for (final Sample sn : this.affectedSamples) {
                            this.reportWriter.print('\t');
                            this.reportWriter.print(vc.getGenotype(sn.getId()).getType().name());
                        }
                        for (final Sample sn : this.unaffectedSamples) {
                            this.reportWriter.print('\t');
                            this.reportWriter.print(vc.getGenotype(sn.getId()).getType().name());
                        }
                    }
                    this.reportWriter.println();
                }
                for (int side = 0; side < 2; ++side) {
                    final VariantContext vc = (side == 0 ? vcx : vcy);
                    final Set<String> set = getAnnotationsForVariant(vc);
                    final VariantContextBuilder vcb = new VariantContextBuilder(vc);
                    final StringBuilder sb = new StringBuilder();
                    sb.append("gene|").append(geneKey.geneName);
                    sb.append("|name|").append(geneKey.label);
                    sb.append("|source|").append(geneKey.source);
                    if (side == 1) {
                        sb.append("|pos|").append(vcx.getStart());
                        sb.append("|ref|").append(vcx.getReference().getDisplayString());
                    } else {
                        sb.append("|pos|").append(vcy.getStart());
                        sb.append("|ref|").append(vcy.getReference().getDisplayString());
                    }
                    sb.append("|samples|").append(matching_affected_samples.stream().map(S -> S.getId()).collect(Collectors.joining("&")));
                    set.add(sb.toString());
                    set.remove("");
                    vcb.attribute(INFO_TAG, new ArrayList<>(set));
                    if (side == 0) {
                        variants.get(x).ctx = vcb.make();
                    } else {
                        variants.get(y).ctx = vcb.make();
                    }
                }
            }
        }
    }
    if (!samples_in_gene.isEmpty() && this.geneReportPath != null) {
        this.reportGeneWriter.print(geneKey.contig);
        this.reportGeneWriter.print('\t');
        this.reportGeneWriter.print(variants.stream().mapToInt(V -> V.ctx.getStart() - 1).min().orElse(-1));
        this.reportGeneWriter.print('\t');
        this.reportGeneWriter.print(variants.stream().mapToInt(V -> V.ctx.getEnd()).max().orElse(-1));
        this.reportGeneWriter.print('\t');
        this.reportGeneWriter.print(geneKey.geneName);
        this.reportGeneWriter.print('\t');
        this.reportGeneWriter.print(geneKey.label);
        this.reportGeneWriter.print('\t');
        this.reportGeneWriter.print(geneKey.source);
        this.reportGeneWriter.print('\t');
        this.reportGeneWriter.print(variants.size());
        this.reportGeneWriter.print('\t');
        this.reportGeneWriter.print(samples_in_gene.size());
        this.reportGeneWriter.print('\t');
        this.reportGeneWriter.print(this.affectedSamples.size());
        this.reportGeneWriter.print('\t');
        this.reportGeneWriter.print(samples_in_gene.stream().map(S -> S.getId()).collect(Collectors.joining(",")));
        this.reportGeneWriter.println();
    }
}
Also used : WritingVariantsDelegate(com.github.lindenb.jvarkit.variant.variantcontext.writer.WritingVariantsDelegate) JexlVariantPredicate(com.github.lindenb.jvarkit.util.vcf.JexlVariantPredicate) Program(com.github.lindenb.jvarkit.util.jcommander.Program) GeneExtractorFactory(com.github.lindenb.jvarkit.util.vcf.predictions.GeneExtractorFactory) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFEncoder(htsjdk.variant.vcf.VCFEncoder) VCFHeaderVersion(htsjdk.variant.vcf.VCFHeaderVersion) DataOutputStream(java.io.DataOutputStream) StringUtil(htsjdk.samtools.util.StringUtil) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) AbstractVCFCodec(htsjdk.variant.vcf.AbstractVCFCodec) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) EOFException(java.io.EOFException) Collectors(java.util.stream.Collectors) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Genotype(htsjdk.variant.variantcontext.Genotype) DataInputStream(java.io.DataInputStream) CloseableIterator(htsjdk.samtools.util.CloseableIterator) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) VCFIterator(htsjdk.variant.vcf.VCFIterator) Parameter(com.beust.jcommander.Parameter) NullOuputStream(com.github.lindenb.jvarkit.io.NullOuputStream) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BiPredicate(java.util.function.BiPredicate) ContigDictComparator(com.github.lindenb.jvarkit.util.samtools.ContigDictComparator) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) SortingCollection(htsjdk.samtools.util.SortingCollection) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) JexlGenotypePredicate(com.github.lindenb.jvarkit.util.vcf.JexlGenotypePredicate) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) Comparator(java.util.Comparator) Sample(com.github.lindenb.jvarkit.pedigree.Sample) Sample(com.github.lindenb.jvarkit.pedigree.Sample) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) HashSet(java.util.HashSet)

Example 17 with Sample

use of com.github.lindenb.jvarkit.pedigree.Sample in project jvarkit by lindenb.

the class BamToMNV method doWork.

@Override
public int doWork(final List<String> args) {
    try {
        final List<Path> bams = IOUtils.unrollPaths(this.input_bams);
        if (bams.isEmpty()) {
            LOG.error("No bam was defined");
            return -1;
        }
        final Pedigree pedigree;
        if (this.pedigreePath != null) {
            pedigree = new PedigreeParser().parse(this.pedigreePath);
            pedigree.checkUniqIds();
        } else {
            pedigree = null;
        }
        try (VCFReader reader = VCFReaderFactory.makeDefault().open(oneAndOnlyOneFile(args), false)) {
            final VCFHeader header = reader.getHeader();
            final OrderChecker<VariantContext> order = new OrderChecker<>(header.getSequenceDictionary(), false);
            try (CloseableIterator<VariantContext> r = reader.iterator()) {
                this.all_variants.addAll(r.stream().filter(V -> V.isBiallelic() && V.isSNP()).map(V -> new VariantContextBuilder(V).noGenotypes().make()).map(order).collect(Collectors.toList()));
            }
        }
        final List<Mnv> all_mnvs = new ArrayList<>();
        for (int i = 0; i + 1 < this.all_variants.size(); i++) {
            final VariantContext v1 = this.all_variants.get(i);
            for (int j = i + 1; j < this.all_variants.size(); j++) {
                final VariantContext v2 = this.all_variants.get(j);
                if (!v1.withinDistanceOf(v2, min_distance_mnv))
                    break;
                if (v1.getStart() == v2.getStart())
                    continue;
                all_mnvs.add(new Mnv(i, j));
            }
        }
        final Set<String> samples = new TreeSet<>();
        final SamReaderFactory srf = super.createSamReaderFactory().referenceSequence(this.faidx);
        for (final Path bam : bams) {
            LOG.info(String.valueOf(bam));
            try (SamReader sr = srf.open(bam)) {
                final SAMFileHeader header = sr.getFileHeader();
                final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
                final String sample = header.getReadGroups().stream().map(R -> R.getSample()).filter(S -> !StringUtils.isBlank(S)).findFirst().orElse(IOUtils.getFilenameWithoutCommonSuffixes(bam));
                if (samples.contains(sample)) {
                    LOG.error("duplicate sample " + sample);
                    return -1;
                }
                samples.add(sample);
                if (pedigree != null && pedigree.getSampleById(sample) == null) {
                    LOG.warn("sample " + sample + " from " + bam + " is not in pedigree.");
                }
                if (all_mnvs.isEmpty())
                    continue;
                final QueryInterval[] intervals = QueryInterval.optimizeIntervals(this.all_variants.stream().map(V -> new QueryInterval(dict.getSequenceIndex(V.getContig()), V.getStart(), V.getEnd())).toArray(X -> new QueryInterval[X]));
                final List<SAMRecord> sam_reads = new ArrayList<>();
                try (CloseableIterator<SAMRecord> iter = sr.query(intervals, false)) {
                    while (iter.hasNext()) {
                        final SAMRecord record = iter.next();
                        if (!SAMRecordDefaultFilter.accept(record, this.minmapq))
                            continue;
                        if (record.getReadBases() == SAMRecord.NULL_SEQUENCE)
                            continue;
                        sam_reads.add(record);
                    }
                }
                // sort on query name
                Collections.sort(sam_reads, (A, B) -> A.getReadName().compareTo(B.getReadName()));
                for (final Mnv mnv : all_mnvs) {
                    final Phase phase = mnv.getPhase(sam_reads);
                    if (phase.equals(Phase.ambigous))
                        continue;
                    mnv.sample2phase.put(sample, phase);
                }
            }
        }
        try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
            pw.print("#CHROM1\tPOS1\tREF1\tALT1");
            pw.print("\tCHROM2\tPOS2\tREF2\tALT2");
            pw.print("\tdistance");
            for (final String sn : samples) pw.print("\t" + sn);
            if (pedigree != null) {
                pw.print("\tcase-cis\tcase-trans\tcontrol-cis\tcontrol-trans\tfisher");
            }
            pw.println();
            for (final Mnv mnv : all_mnvs) {
                if (mnv.sample2phase.values().stream().allMatch(V -> V.equals(Phase.ambigous) || V.equals(Phase.ref)))
                    continue;
                for (int side = 0; side < 2; ++side) {
                    final VariantContext ctx = mnv.get(side);
                    if (side > 0)
                        pw.print("\t");
                    pw.print(ctx.getContig());
                    pw.print("\t");
                    pw.print(ctx.getStart());
                    pw.print("\t");
                    pw.print(ctx.getReference().getDisplayString());
                    pw.print("\t");
                    pw.print(ctx.getAlleles().get(1).getDisplayString());
                }
                pw.print("\t");
                pw.print(CoordMath.getLength(mnv.get(0).getStart(), mnv.get(1).getEnd()));
                int case_cis = 0;
                int case_trans = 0;
                int ctrl_cis = 0;
                int ctrl_trans = 0;
                for (final String sn : samples) {
                    pw.print("\t");
                    final Phase phase = mnv.sample2phase.get(sn);
                    if (phase == null) {
                        pw.print(".");
                        continue;
                    }
                    pw.print(phase.name());
                    if (pedigree != null) {
                        final Sample sample = pedigree.getSampleById(sn);
                        if (sample == null) {
                        // nothing
                        } else if (sample.isAffected()) {
                            if (phase.equals(Phase.cis)) {
                                case_cis++;
                            } else if (phase.equals(Phase.trans)) {
                                case_trans++;
                            }
                        } else if (sample.isUnaffected()) {
                            if (phase.equals(Phase.cis)) {
                                ctrl_cis++;
                            } else if (phase.equals(Phase.trans)) {
                                ctrl_trans++;
                            }
                        }
                    }
                }
                if (pedigree != null) {
                    pw.print("\t");
                    pw.print(case_cis);
                    pw.print("\t");
                    pw.print(case_trans);
                    pw.print("\t");
                    pw.print(ctrl_cis);
                    pw.print("\t");
                    pw.print(ctrl_trans);
                    pw.print("\t");
                    final FisherExactTest fisher = FisherExactTest.compute(case_cis, case_trans, ctrl_cis, ctrl_trans);
                    pw.print(fisher.getAsDouble());
                }
                pw.println();
            }
            pw.flush();
        }
        return 0;
    } catch (final Throwable err) {
        LOG.error(err);
        return -1;
    }
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashMap(java.util.HashMap) SAMFileHeader(htsjdk.samtools.SAMFileHeader) TreeSet(java.util.TreeSet) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) AlignmentBlock(htsjdk.samtools.AlignmentBlock) DistanceParser(com.github.lindenb.jvarkit.util.bio.DistanceParser) NoSplitter(com.github.lindenb.jvarkit.util.jcommander.NoSplitter) FisherExactTest(com.github.lindenb.jvarkit.math.stats.FisherExactTest) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) SAMRecordDefaultFilter(com.github.lindenb.jvarkit.samtools.SAMRecordDefaultFilter) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFReaderFactory(com.github.lindenb.jvarkit.variant.vcf.VCFReaderFactory) Path(java.nio.file.Path) PrintWriter(java.io.PrintWriter) SequenceDictionaryUtils(com.github.lindenb.jvarkit.util.bio.SequenceDictionaryUtils) EqualIterator(com.github.lindenb.jvarkit.iterator.EqualIterator) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFReader(htsjdk.variant.vcf.VCFReader) Set(java.util.Set) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) StringUtils(com.github.lindenb.jvarkit.lang.StringUtils) QueryInterval(htsjdk.samtools.QueryInterval) OrderChecker(com.github.lindenb.jvarkit.dict.OrderChecker) CoordMath(htsjdk.samtools.util.CoordMath) Optional(java.util.Optional) VariantContext(htsjdk.variant.variantcontext.VariantContext) Collections(java.util.Collections) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Sample(com.github.lindenb.jvarkit.pedigree.Sample) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) QueryInterval(htsjdk.samtools.QueryInterval) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) PedigreeParser(com.github.lindenb.jvarkit.pedigree.PedigreeParser) SamReader(htsjdk.samtools.SamReader) VCFReader(htsjdk.variant.vcf.VCFReader) TreeSet(java.util.TreeSet) VCFHeader(htsjdk.variant.vcf.VCFHeader) PrintWriter(java.io.PrintWriter) Path(java.nio.file.Path) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Sample(com.github.lindenb.jvarkit.pedigree.Sample) FisherExactTest(com.github.lindenb.jvarkit.math.stats.FisherExactTest) OrderChecker(com.github.lindenb.jvarkit.dict.OrderChecker) Pedigree(com.github.lindenb.jvarkit.pedigree.Pedigree) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Aggregations

Sample (com.github.lindenb.jvarkit.pedigree.Sample)17 PedigreeParser (com.github.lindenb.jvarkit.pedigree.PedigreeParser)14 Parameter (com.beust.jcommander.Parameter)13 Pedigree (com.github.lindenb.jvarkit.pedigree.Pedigree)13 Program (com.github.lindenb.jvarkit.util.jcommander.Program)13 Logger (com.github.lindenb.jvarkit.util.log.Logger)13 Genotype (htsjdk.variant.variantcontext.Genotype)13 VariantContext (htsjdk.variant.variantcontext.VariantContext)13 Path (java.nio.file.Path)13 List (java.util.List)13 VCFHeader (htsjdk.variant.vcf.VCFHeader)12 ArrayList (java.util.ArrayList)12 Collectors (java.util.stream.Collectors)12 Set (java.util.Set)11 StringUtils (com.github.lindenb.jvarkit.lang.StringUtils)10 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)10 IOException (java.io.IOException)10 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)9 PrintWriter (java.io.PrintWriter)9 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)8