use of com.github.lindenb.jvarkit.pedigree.Sample in project jvarkit by lindenb.
the class VCFComposite method scan.
private void scan(final GeneIdentifier geneKey, final List<VariantLine> variants) {
int pair_index = 0;
if (variants.size() < 2) {
return;
}
// test if this gene is a big pool of variant, false positive. e.g: highly variables genes.
if (max_number_of_variant_per_gene >= 0) {
if (variants.size() > max_number_of_variant_per_gene) {
LOG.warn("Too many variants " + variants.size() + " for " + geneKey);
return;
}
}
final Set<Sample> samples_in_gene = new HashSet<>(this.affectedSamples.size());
// search for the one snp
for (int x = 0; x + 1 < variants.size(); ++x) {
final VariantContext vcx = variants.get(x).ctx;
for (int y = x + 1; y < variants.size(); ++y) {
final VariantContext vcy = variants.get(y).ctx;
final Set<Sample> matching_affected_samples = new HashSet<>(this.affectedSamples.size());
// loop over affected samples
for (final Sample affected : this.affectedSamples) {
final Genotype gcx = vcx.getGenotype(affected.getId());
// child variant n. y must be HOM_VAR or HET
if (gcx == null || !isGenotypeForAffected(gcx))
continue;
// filtered ?
if (!this.genotypeFilter.test(vcx, gcx))
continue;
final Genotype gcy = vcy.getGenotype(affected.getId());
// child variant n. y must be HOM_VAR or HET
if (gcy == null || !isGenotypeForAffected(gcy))
continue;
// filtered ?
if (!this.genotypeFilter.test(vcy, gcy))
continue;
boolean unaffected_are_ok = true;
// check unaffected indididual don't have same haplotype
for (final Sample unaffected : this.unaffectedSamples) {
final Genotype gux = vcx.getGenotype(unaffected.getId());
if (gux != null && gux.isHomVar()) {
unaffected_are_ok = false;
break;
}
final Genotype guy = vcy.getGenotype(unaffected.getId());
if (guy != null && guy.isHomVar()) {
unaffected_are_ok = false;
break;
}
if (gux != null && guy != null && gux.sameGenotype(gcx, true) && guy.sameGenotype(gcy, true)) {
unaffected_are_ok = false;
break;
}
}
if (!unaffected_are_ok)
continue;
matching_affected_samples.add(affected);
}
boolean affected_are_ok;
switch(this.pair_selection) {
case all:
affected_are_ok = matching_affected_samples.size() == this.affectedSamples.size();
break;
case any:
affected_are_ok = !matching_affected_samples.isEmpty();
break;
default:
throw new IllegalStateException();
}
if (affected_are_ok) {
samples_in_gene.addAll(matching_affected_samples);
if (this.reportPath != null) {
this.reportWriter.print(geneKey.contig);
this.reportWriter.print('\t');
this.reportWriter.print(Math.min(vcx.getStart(), vcy.getStart()) - 1);
this.reportWriter.print('\t');
this.reportWriter.print(Math.max(vcx.getEnd(), vcy.getEnd()));
this.reportWriter.print('\t');
this.reportWriter.print(++pair_index);
this.reportWriter.print('\t');
this.reportWriter.print(geneKey.geneName);
this.reportWriter.print('\t');
this.reportWriter.print(geneKey.label);
this.reportWriter.print('\t');
this.reportWriter.print(geneKey.source);
for (int side = 0; side < 2; ++side) {
final VariantContext vc = (side == 0 ? vcx : vcy);
this.reportWriter.print('\t');
this.reportWriter.print(vc.getStart());
this.reportWriter.print('\t');
this.reportWriter.print(vc.getEnd());
this.reportWriter.print('\t');
this.reportWriter.print(vc.getReference().getDisplayString());
this.reportWriter.print('\t');
this.reportWriter.print(vc.getAlternateAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining(",")));
this.reportWriter.print('\t');
this.reportWriter.print(vc.getAttributes().keySet().stream().filter(K -> K.equals(AnnPredictionParser.getDefaultTag()) || K.equals(VepPredictionParser.getDefaultTag())).map(K -> K + "=" + String.join(",", vc.getAttributeAsStringList(K, ""))).collect(Collectors.joining(";")));
for (final Sample sn : this.affectedSamples) {
this.reportWriter.print('\t');
this.reportWriter.print(vc.getGenotype(sn.getId()).getType().name());
}
for (final Sample sn : this.unaffectedSamples) {
this.reportWriter.print('\t');
this.reportWriter.print(vc.getGenotype(sn.getId()).getType().name());
}
}
this.reportWriter.println();
}
for (int side = 0; side < 2; ++side) {
final VariantContext vc = (side == 0 ? vcx : vcy);
final Set<String> set = getAnnotationsForVariant(vc);
final VariantContextBuilder vcb = new VariantContextBuilder(vc);
final StringBuilder sb = new StringBuilder();
sb.append("gene|").append(geneKey.geneName);
sb.append("|name|").append(geneKey.label);
sb.append("|source|").append(geneKey.source);
if (side == 1) {
sb.append("|pos|").append(vcx.getStart());
sb.append("|ref|").append(vcx.getReference().getDisplayString());
} else {
sb.append("|pos|").append(vcy.getStart());
sb.append("|ref|").append(vcy.getReference().getDisplayString());
}
sb.append("|samples|").append(matching_affected_samples.stream().map(S -> S.getId()).collect(Collectors.joining("&")));
set.add(sb.toString());
set.remove("");
vcb.attribute(INFO_TAG, new ArrayList<>(set));
if (side == 0) {
variants.get(x).ctx = vcb.make();
} else {
variants.get(y).ctx = vcb.make();
}
}
}
}
}
if (!samples_in_gene.isEmpty() && this.geneReportPath != null) {
this.reportGeneWriter.print(geneKey.contig);
this.reportGeneWriter.print('\t');
this.reportGeneWriter.print(variants.stream().mapToInt(V -> V.ctx.getStart() - 1).min().orElse(-1));
this.reportGeneWriter.print('\t');
this.reportGeneWriter.print(variants.stream().mapToInt(V -> V.ctx.getEnd()).max().orElse(-1));
this.reportGeneWriter.print('\t');
this.reportGeneWriter.print(geneKey.geneName);
this.reportGeneWriter.print('\t');
this.reportGeneWriter.print(geneKey.label);
this.reportGeneWriter.print('\t');
this.reportGeneWriter.print(geneKey.source);
this.reportGeneWriter.print('\t');
this.reportGeneWriter.print(variants.size());
this.reportGeneWriter.print('\t');
this.reportGeneWriter.print(samples_in_gene.size());
this.reportGeneWriter.print('\t');
this.reportGeneWriter.print(this.affectedSamples.size());
this.reportGeneWriter.print('\t');
this.reportGeneWriter.print(samples_in_gene.stream().map(S -> S.getId()).collect(Collectors.joining(",")));
this.reportGeneWriter.println();
}
}
use of com.github.lindenb.jvarkit.pedigree.Sample in project jvarkit by lindenb.
the class BamToMNV method doWork.
@Override
public int doWork(final List<String> args) {
try {
final List<Path> bams = IOUtils.unrollPaths(this.input_bams);
if (bams.isEmpty()) {
LOG.error("No bam was defined");
return -1;
}
final Pedigree pedigree;
if (this.pedigreePath != null) {
pedigree = new PedigreeParser().parse(this.pedigreePath);
pedigree.checkUniqIds();
} else {
pedigree = null;
}
try (VCFReader reader = VCFReaderFactory.makeDefault().open(oneAndOnlyOneFile(args), false)) {
final VCFHeader header = reader.getHeader();
final OrderChecker<VariantContext> order = new OrderChecker<>(header.getSequenceDictionary(), false);
try (CloseableIterator<VariantContext> r = reader.iterator()) {
this.all_variants.addAll(r.stream().filter(V -> V.isBiallelic() && V.isSNP()).map(V -> new VariantContextBuilder(V).noGenotypes().make()).map(order).collect(Collectors.toList()));
}
}
final List<Mnv> all_mnvs = new ArrayList<>();
for (int i = 0; i + 1 < this.all_variants.size(); i++) {
final VariantContext v1 = this.all_variants.get(i);
for (int j = i + 1; j < this.all_variants.size(); j++) {
final VariantContext v2 = this.all_variants.get(j);
if (!v1.withinDistanceOf(v2, min_distance_mnv))
break;
if (v1.getStart() == v2.getStart())
continue;
all_mnvs.add(new Mnv(i, j));
}
}
final Set<String> samples = new TreeSet<>();
final SamReaderFactory srf = super.createSamReaderFactory().referenceSequence(this.faidx);
for (final Path bam : bams) {
LOG.info(String.valueOf(bam));
try (SamReader sr = srf.open(bam)) {
final SAMFileHeader header = sr.getFileHeader();
final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
final String sample = header.getReadGroups().stream().map(R -> R.getSample()).filter(S -> !StringUtils.isBlank(S)).findFirst().orElse(IOUtils.getFilenameWithoutCommonSuffixes(bam));
if (samples.contains(sample)) {
LOG.error("duplicate sample " + sample);
return -1;
}
samples.add(sample);
if (pedigree != null && pedigree.getSampleById(sample) == null) {
LOG.warn("sample " + sample + " from " + bam + " is not in pedigree.");
}
if (all_mnvs.isEmpty())
continue;
final QueryInterval[] intervals = QueryInterval.optimizeIntervals(this.all_variants.stream().map(V -> new QueryInterval(dict.getSequenceIndex(V.getContig()), V.getStart(), V.getEnd())).toArray(X -> new QueryInterval[X]));
final List<SAMRecord> sam_reads = new ArrayList<>();
try (CloseableIterator<SAMRecord> iter = sr.query(intervals, false)) {
while (iter.hasNext()) {
final SAMRecord record = iter.next();
if (!SAMRecordDefaultFilter.accept(record, this.minmapq))
continue;
if (record.getReadBases() == SAMRecord.NULL_SEQUENCE)
continue;
sam_reads.add(record);
}
}
// sort on query name
Collections.sort(sam_reads, (A, B) -> A.getReadName().compareTo(B.getReadName()));
for (final Mnv mnv : all_mnvs) {
final Phase phase = mnv.getPhase(sam_reads);
if (phase.equals(Phase.ambigous))
continue;
mnv.sample2phase.put(sample, phase);
}
}
}
try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
pw.print("#CHROM1\tPOS1\tREF1\tALT1");
pw.print("\tCHROM2\tPOS2\tREF2\tALT2");
pw.print("\tdistance");
for (final String sn : samples) pw.print("\t" + sn);
if (pedigree != null) {
pw.print("\tcase-cis\tcase-trans\tcontrol-cis\tcontrol-trans\tfisher");
}
pw.println();
for (final Mnv mnv : all_mnvs) {
if (mnv.sample2phase.values().stream().allMatch(V -> V.equals(Phase.ambigous) || V.equals(Phase.ref)))
continue;
for (int side = 0; side < 2; ++side) {
final VariantContext ctx = mnv.get(side);
if (side > 0)
pw.print("\t");
pw.print(ctx.getContig());
pw.print("\t");
pw.print(ctx.getStart());
pw.print("\t");
pw.print(ctx.getReference().getDisplayString());
pw.print("\t");
pw.print(ctx.getAlleles().get(1).getDisplayString());
}
pw.print("\t");
pw.print(CoordMath.getLength(mnv.get(0).getStart(), mnv.get(1).getEnd()));
int case_cis = 0;
int case_trans = 0;
int ctrl_cis = 0;
int ctrl_trans = 0;
for (final String sn : samples) {
pw.print("\t");
final Phase phase = mnv.sample2phase.get(sn);
if (phase == null) {
pw.print(".");
continue;
}
pw.print(phase.name());
if (pedigree != null) {
final Sample sample = pedigree.getSampleById(sn);
if (sample == null) {
// nothing
} else if (sample.isAffected()) {
if (phase.equals(Phase.cis)) {
case_cis++;
} else if (phase.equals(Phase.trans)) {
case_trans++;
}
} else if (sample.isUnaffected()) {
if (phase.equals(Phase.cis)) {
ctrl_cis++;
} else if (phase.equals(Phase.trans)) {
ctrl_trans++;
}
}
}
}
if (pedigree != null) {
pw.print("\t");
pw.print(case_cis);
pw.print("\t");
pw.print(case_trans);
pw.print("\t");
pw.print(ctrl_cis);
pw.print("\t");
pw.print(ctrl_trans);
pw.print("\t");
final FisherExactTest fisher = FisherExactTest.compute(case_cis, case_trans, ctrl_cis, ctrl_trans);
pw.print(fisher.getAsDouble());
}
pw.println();
}
pw.flush();
}
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
}
}
Aggregations