use of com.github.lindenb.jvarkit.pedigree.PedigreeParser in project jvarkit by lindenb.
the class VcfFilterJdk method run.
private int run(final VCFIterator iter, final VariantContextWriter out) {
ProgressFactory.Watcher<VariantContext> progress = null;
String code = null;
try {
if (this.scriptPath != null) {
code = IOUtils.slurpPath(this.scriptPath);
} else {
code = this.scriptExpr;
}
final Random rand = new Random(System.currentTimeMillis());
final String javaClassName = VcfFilterJdk.class.getSimpleName() + "Custom" + Math.abs(rand.nextInt());
final String generatedClassName = OpenJdkCompiler.getGeneratedAnnotationClassName();
final StringWriter codeWriter = new StringWriter();
final PrintWriter pw = new PrintWriter(codeWriter);
pw.println("import java.util.*;");
pw.println("import java.util.stream.*;");
pw.println("import java.util.function.*;");
pw.println("import htsjdk.samtools.util.*;");
pw.println("import htsjdk.variant.variantcontext.*;");
pw.println("import htsjdk.variant.vcf.*;");
if (!StringUtil.isBlank(generatedClassName)) {
pw.println("@" + generatedClassName + "(value=\"" + VcfFilterJdk.class.getSimpleName() + "\",date=\"" + new Iso8601Date(new Date()) + "\")");
}
pw.println("public class " + javaClassName + " extends " + AbstractFilter.class.getName().replace('$', '.') + " {");
pw.println(" public " + javaClassName + "(final VCFHeader header) {");
pw.println(" super(header);");
pw.println(" }");
if (this.user_code_is_body) {
pw.println(" /** user's code starts here */");
pw.println(code);
pw.println("/** user's code ends here */");
} else {
pw.println(" @Override");
pw.println(" public Object apply(final VariantContext " + getVariantVariableName() + ") {");
pw.println(" /** user's code starts here */");
pw.println(code);
pw.println("/** user's code ends here */");
pw.println(" }");
}
pw.println("}");
pw.flush();
if (!this.hideGeneratedCode) {
LOG.debug(" Compiling :\n" + OpenJdkCompiler.beautifyCode(codeWriter.toString()));
}
if (this.saveCodeInDir != null) {
BufferedWriter cw = null;
try {
IOUtil.assertDirectoryIsWritable(this.saveCodeInDir);
cw = Files.newBufferedWriter(this.saveCodeInDir.resolve(javaClassName + ".java"));
cw.write(codeWriter.toString());
cw.flush();
cw.close();
cw = null;
LOG.info("saved " + javaClassName + ".java in " + this.saveCodeInDir);
} catch (final Exception err) {
throw new RuntimeIOException(err);
} finally {
CloserUtil.close(cw);
}
}
final OpenJdkCompiler compiler = OpenJdkCompiler.getInstance();
final Class<?> compiledClass = compiler.compileClass(javaClassName, codeWriter.toString());
final Constructor<?> constructor = compiledClass.getDeclaredConstructor(VCFHeader.class);
final VCFHeader header = iter.getHeader();
final AbstractFilter filter_instance;
;
/* change header */
final VCFHeader h2 = new VCFHeader(header);
/**
* recalculate INFO/AF, INFO/AN ... variables and 'add'
*/
final Consumer<VariantContext> recalcAndAdd;
if (this.recalcAttributes && header.hasGenotypingData()) {
final VariantAttributesRecalculator recalculator = new VariantAttributesRecalculator();
recalculator.setHeader(h2);
recalcAndAdd = V -> out.add(recalculator.apply(V));
} else {
recalcAndAdd = V -> out.add(V);
}
final VCFFilterHeaderLine filterHeaderLine = StringUtils.isBlank(filteredTag) ? null : new VCFFilterHeaderLine(this.filteredTag.trim(), "Filtered with " + VcfFilterJdk.class.getSimpleName());
if (filterHeaderLine != null) {
h2.addMetaDataLine(filterHeaderLine);
}
// add genotype filter key if missing
if (h2.getFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY) == null) {
h2.addMetaDataLine(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY, true));
}
for (final String xf : Arrays.stream(this.extraFilters.split("[ ,;]+")).filter(S -> !StringUtil.isBlank(S)).collect(Collectors.toSet())) {
h2.addMetaDataLine(new VCFFilterHeaderLine(xf, "Custom FILTER inserted with " + VcfFilterJdk.class.getSimpleName()));
}
try {
filter_instance = (AbstractFilter) constructor.newInstance(header);
} catch (final Throwable err) {
LOG.error(err);
return -1;
}
/* end change header */
JVarkitVersion.getInstance().addMetaData(this, h2);
out.writeHeader(h2);
if (this.pedigreePath != null) {
filter_instance.pedigree = new PedigreeParser().parse(this.pedigreePath);
}
filter_instance.userData.put("first.variant", Boolean.TRUE);
filter_instance.userData.put("last.variant", Boolean.FALSE);
progress = ProgressFactory.newInstance().dictionary(header).logger(LOG).build();
while (iter.hasNext() && !out.checkError()) {
final VariantContext variation = progress.apply(iter.next());
/* handle variant */
final Object result = filter_instance.apply(variation);
// result is an array of a collection of variants
if (result != null && (result.getClass().isArray() || (result instanceof Collection))) {
final Collection<?> col;
if (result.getClass().isArray()) {
final Object[] array = (Object[]) result;
col = Arrays.asList(array);
} else {
col = (Collection<?>) result;
}
// write all of variants
for (final Object item : col) {
if (item == null)
throw new JvarkitException.UserError("item in array is null");
if (!(item instanceof VariantContext))
throw new JvarkitException.UserError("item in array is not a VariantContext " + item.getClass());
recalcAndAdd.accept(VariantContext.class.cast(item));
}
} else // result is a VariantContext
if (result != null && (result instanceof VariantContext)) {
recalcAndAdd.accept(VariantContext.class.cast(result));
} else {
boolean accept = true;
if (result == null) {
accept = false;
} else if (result instanceof Boolean) {
if (Boolean.FALSE.equals(result))
accept = false;
} else if (result instanceof Number) {
if (((Number) result).intValue() != 1)
accept = false;
} else {
LOG.warn("Script returned something that is not a boolean or a number:" + result.getClass());
accept = false;
}
if (!accept) {
if (filterHeaderLine != null) {
final VariantContextBuilder vcb = new VariantContextBuilder(variation);
vcb.filter(filterHeaderLine.getID());
recalcAndAdd.accept(vcb.make());
}
continue;
}
// set PASS filter if needed
if (filterHeaderLine != null && !variation.isFiltered()) {
recalcAndAdd.accept(new VariantContextBuilder(variation).passFilters().make());
continue;
}
recalcAndAdd.accept(variation);
}
/* end handle variant */
filter_instance.userData.put("first.variant", Boolean.FALSE);
filter_instance.userData.put("last.variant", !iter.hasNext());
final Object stop = filter_instance.userData.get("STOP");
if (Boolean.TRUE.equals(stop))
break;
}
progress.close();
progress = null;
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
code = null;
CloserUtil.close(progress);
}
}
use of com.github.lindenb.jvarkit.pedigree.PedigreeParser in project jvarkit by lindenb.
the class GroupByGene method read.
private void read(final String input) throws IOException {
SortingCollection<Call> sortingCollection = null;
VCFIterator vcfIterator = null;
try {
final BcfIteratorBuilder iterbuilder = new BcfIteratorBuilder();
vcfIterator = (input == null ? iterbuilder.open(stdin()) : iterbuilder.open(input));
final VCFHeader header = vcfIterator.getHeader();
this.contigDictComparator = new ContigDictComparator(SequenceDictionaryUtils.extractRequired(header));
sortingCollection = SortingCollection.newInstance(Call.class, new CallCodec(header), (C1, C2) -> C1.compare2(C2), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
sortingCollection.setDestructiveIteration(true);
final GeneExtractorFactory geneExtractorFactory = new GeneExtractorFactory(header);
final List<GeneExtractorFactory.GeneExtractor> geneExtractors = geneExtractorFactory.parse(this.extractorsNames);
final List<String> sampleNames;
if (header.getSampleNamesInOrder() != null) {
sampleNames = header.getSampleNamesInOrder();
} else {
sampleNames = Collections.emptyList();
}
final Pedigree pedigree;
if (this.pedigreePath != null) {
final PedigreeParser pedParser = new PedigreeParser();
pedigree = pedParser.parse(this.pedigreePath);
} else {
pedigree = PedigreeParser.empty();
}
final ProgressFactory.Watcher<VariantContext> progress = ProgressFactory.newInstance().dictionary(header).logger(LOG).build();
while (vcfIterator.hasNext()) {
final VariantContext ctx = progress.apply(vcfIterator.next());
if (!ctx.isVariant())
continue;
if (ignore_filtered && ctx.isFiltered())
continue;
// simplify line
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.noID();
vcb.log10PError(VariantContext.NO_LOG10_PERROR);
vcb.unfiltered();
vcb.attributes(Collections.emptyMap());
final VariantContext ctx2 = vcb.make();
final SortingCollection<Call> finalSorter = sortingCollection;
geneExtractors.stream().flatMap(EX -> EX.apply(ctx).keySet().stream()).forEach(KG -> {
final Call c = new Call();
c.ctx = ctx2;
c.gene = new GeneName(KG.getKey(), KG.getGene(), KG.getMethod());
finalSorter.add(c);
});
}
CloserUtil.close(vcfIterator);
vcfIterator = null;
sortingCollection.doneAdding();
progress.close();
/**
* dump
*/
final Set<String> casesSamples = pedigree.getAffectedSamples().stream().map(P -> P.getId()).filter(ID -> sampleNames.contains(ID)).collect(Collectors.toSet());
final Set<String> controlsSamples = pedigree.getUnaffectedSamples().stream().map(P -> P.getId()).filter(ID -> sampleNames.contains(ID)).collect(Collectors.toSet());
final Set<String> maleSamples = pedigree.getSamples().stream().filter(P -> P.isMale()).map(P -> P.getId()).filter(ID -> sampleNames.contains(ID)).collect(Collectors.toSet());
final Set<String> femaleSamples = pedigree.getSamples().stream().filter(P -> P.isFemale()).map(P -> P.getId()).filter(ID -> sampleNames.contains(ID)).collect(Collectors.toSet());
final Predicate<Genotype> genotypeFilter = genotype -> {
if (!genotype.isAvailable())
return false;
if (!genotype.isCalled())
return false;
if (genotype.isNoCall())
return false;
if (genotype.isHomRef())
return false;
if (this.ignore_filtered_genotype && genotype.isFiltered())
return false;
return true;
};
PrintStream pw = openPathOrStdoutAsPrintStream(this.outFile);
pw.print("#chrom");
pw.print('\t');
pw.print("min.POS");
pw.print('\t');
pw.print("max.POS");
pw.print('\t');
pw.print("gene.name");
pw.print('\t');
pw.print("gene.label");
pw.print('\t');
pw.print("gene.type");
pw.print('\t');
pw.print("samples.affected");
pw.print('\t');
pw.print("count.variations");
if (this.print_positions) {
pw.print('\t');
pw.print("positions");
}
if (!casesSamples.isEmpty()) {
pw.print('\t');
pw.print("pedigree.cases");
}
if (!controlsSamples.isEmpty()) {
pw.print('\t');
pw.print("pedigree.controls");
}
if (!maleSamples.isEmpty()) {
pw.print('\t');
pw.print("pedigree.males");
}
if (!femaleSamples.isEmpty()) {
pw.print('\t');
pw.print("pedigree.females");
}
if (this.print_fisher && !controlsSamples.isEmpty() && !casesSamples.isEmpty()) {
pw.print('\t');
pw.print("fisher");
}
for (final String sample : sampleNames) {
pw.print('\t');
pw.print(sample);
}
pw.println();
final CloseableIterator<Call> iter = sortingCollection.iterator();
final EqualRangeIterator<Call> eqiter = new EqualRangeIterator<>(iter, (C1, C2) -> C1.compareTo(C2));
while (eqiter.hasNext()) {
final List<Call> row = eqiter.next();
final Call first = row.get(0);
final List<VariantContext> variantList = row.stream().map(R -> R.ctx).collect(Collectors.toList());
final int minPos = variantList.stream().mapToInt(R -> R.getStart()).min().getAsInt();
final int maxPos = variantList.stream().mapToInt(R -> R.getEnd()).max().getAsInt();
final Set<String> sampleCarryingMut = new HashSet<String>();
final Counter<String> pedCasesCarryingMut = new Counter<String>();
final Counter<String> pedCtrlsCarryingMut = new Counter<String>();
final Counter<String> malesCarryingMut = new Counter<String>();
final Counter<String> femalesCarryingMut = new Counter<String>();
final Counter<String> sample2count = new Counter<String>();
for (final VariantContext ctx : variantList) {
for (final Genotype genotype : ctx.getGenotypes()) {
if (!genotypeFilter.test(genotype))
continue;
final String sampleName = genotype.getSampleName();
sample2count.incr(sampleName);
sampleCarryingMut.add(sampleName);
if (casesSamples.contains(sampleName)) {
pedCasesCarryingMut.incr(sampleName);
}
if (controlsSamples.contains(sampleName)) {
pedCtrlsCarryingMut.incr(sampleName);
}
if (maleSamples.contains(sampleName)) {
malesCarryingMut.incr(sampleName);
}
if (femaleSamples.contains(sampleName)) {
femalesCarryingMut.incr(sampleName);
}
}
}
pw.print(first.getContig());
pw.print('\t');
// convert to bed
pw.print(minPos - 1);
pw.print('\t');
pw.print(maxPos);
pw.print('\t');
pw.print(first.gene.name);
pw.print('\t');
pw.print(first.gene.label);
pw.print('\t');
pw.print(first.gene.type);
pw.print('\t');
pw.print(sampleCarryingMut.size());
pw.print('\t');
pw.print(variantList.size());
if (this.print_positions) {
pw.print('\t');
pw.print(variantList.stream().map(CTX -> String.valueOf(CTX.getStart()) + ":" + CTX.getAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining("/"))).collect(Collectors.joining(";")));
}
if (!casesSamples.isEmpty()) {
pw.print('\t');
pw.print(pedCasesCarryingMut.getCountCategories());
}
if (!controlsSamples.isEmpty()) {
pw.print('\t');
pw.print(pedCtrlsCarryingMut.getCountCategories());
}
if (!maleSamples.isEmpty()) {
pw.print('\t');
pw.print(malesCarryingMut.getCountCategories());
}
if (!femaleSamples.isEmpty()) {
pw.print('\t');
pw.print(femalesCarryingMut.getCountCategories());
}
if (this.print_fisher && !controlsSamples.isEmpty() && !casesSamples.isEmpty()) {
int count_case_mut = 0;
int count_ctrl_mut = 0;
int count_case_wild = 0;
int count_ctrl_wild = 0;
for (final String sampleName : header.getSampleNamesInOrder()) {
final boolean has_mutation = variantList.stream().map(V -> V.getGenotype(sampleName)).anyMatch(G -> G != null && genotypeFilter.test(G));
if (controlsSamples.contains(sampleName)) {
if (has_mutation) {
count_ctrl_mut++;
} else {
count_ctrl_wild++;
}
} else if (casesSamples.contains(sampleName)) {
if (has_mutation) {
count_case_mut++;
} else {
count_case_wild++;
}
}
}
final FisherExactTest fisher = FisherExactTest.compute(count_case_mut, count_case_wild, count_ctrl_mut, count_ctrl_wild);
pw.print('\t');
pw.print(fisher.getAsDouble());
}
for (final String sample : sampleNames) {
pw.print('\t');
pw.print(sample2count.count(sample));
}
pw.println();
if (pw.checkError())
break;
}
eqiter.close();
iter.close();
pw.flush();
if (this.outFile != null)
pw.close();
} finally {
CloserUtil.close(vcfIterator);
if (sortingCollection != null)
sortingCollection.cleanup();
}
}
use of com.github.lindenb.jvarkit.pedigree.PedigreeParser in project jvarkit by lindenb.
the class CoverageServer method doWork.
@Override
public int doWork(final List<String> args) {
if (this.image_width < 10) {
LOG.error("low image width");
return -1;
}
if (this.image_height < 10) {
LOG.error("low image height");
return -1;
}
if (this.images_per_row < 1) {
LOG.error("low images_per_row");
return -1;
}
if (this.extend_factor <= 0) {
LOG.error("bad extend_factor " + this.extend_factor);
return -1;
}
try {
this.bamInput.addAll(IOUtils.unrollPaths(args).stream().map(F -> new BamInput(F)).collect(Collectors.toList()));
if (this.bamInput.isEmpty()) {
LOG.error("No BAM defined.");
return -1;
}
this.dictionary = SequenceDictionaryUtils.extractRequired(this.faidxRef);
for (final BamInput bi : this.bamInput) {
final SamReaderFactory srf = SamReaderFactory.make().validationStringency(ValidationStringency.LENIENT).referenceSequence(this.faidxRef);
try (SamReader sr = srf.open(bi.bamPath)) {
final SAMFileHeader header = sr.getFileHeader();
SequenceUtil.assertSequenceDictionariesEqual(this.dictionary, SequenceDictionaryUtils.extractRequired(header));
bi.sample = header.getReadGroups().stream().map(R -> R.getSample()).filter(S -> !StringUtils.isBlank(S)).findFirst().orElse(IOUtils.getFilenameWithoutCommonSuffixes(bi.bamPath));
}
}
if (this.pedigreePath != null) {
this.pedigree = new PedigreeParser().parse(this.pedigreePath);
}
if (this.intervalsource != null) {
final ContigNameConverter cvt = ContigNameConverter.fromOneDictionary(this.dictionary);
final BedLineCodec codec = new BedLineCodec();
try (BufferedReader br = IOUtils.openPathForBufferedReading(this.intervalsource)) {
br.lines().filter(L -> !BedLine.isBedHeader(L)).map(L -> codec.decode(L)).filter(B -> B != null).map(B -> new ReviewedInterval(new SimpleInterval(B.getContig(), B.getStart(), B.getEnd()), B.getOrDefault(3, ""))).map(B -> {
final String ctg = cvt.apply(B.getContig());
if (StringUtils.isBlank(ctg))
return null;
if (ctg.equals(B.getContig()))
return B;
return new ReviewedInterval(new SimpleInterval(ctg, B.getStart(), B.getEnd()), B.getName());
}).filter(B -> B != null).forEach(B -> named_intervals.add(B));
}
}
this.intervalListProvider.dictionary(this.dictionary).skipUnknownContigs().stream().map(L -> new Interval(L)).forEach(B -> named_intervals.add(new ReviewedInterval(B, "")));
final Server server = new Server(this.serverPort);
final ServletContextHandler context = new ServletContextHandler();
context.addServlet(new ServletHolder(new CoverageServlet()), "/*");
context.setContextPath("/");
context.setResourceBase(".");
server.setHandler(context);
LOG.info("Starting server " + getProgramName() + " on port " + this.serverPort);
server.start();
LOG.info("Server started. Press Ctrl-C to stop. Check your proxy settings ." + " Open a web browser at http://localhost:" + this.serverPort + "/coverage .");
server.join();
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
}
}
use of com.github.lindenb.jvarkit.pedigree.PedigreeParser in project jvarkit by lindenb.
the class BamToMNV method doWork.
@Override
public int doWork(final List<String> args) {
try {
final List<Path> bams = IOUtils.unrollPaths(this.input_bams);
if (bams.isEmpty()) {
LOG.error("No bam was defined");
return -1;
}
final Pedigree pedigree;
if (this.pedigreePath != null) {
pedigree = new PedigreeParser().parse(this.pedigreePath);
pedigree.checkUniqIds();
} else {
pedigree = null;
}
try (VCFReader reader = VCFReaderFactory.makeDefault().open(oneAndOnlyOneFile(args), false)) {
final VCFHeader header = reader.getHeader();
final OrderChecker<VariantContext> order = new OrderChecker<>(header.getSequenceDictionary(), false);
try (CloseableIterator<VariantContext> r = reader.iterator()) {
this.all_variants.addAll(r.stream().filter(V -> V.isBiallelic() && V.isSNP()).map(V -> new VariantContextBuilder(V).noGenotypes().make()).map(order).collect(Collectors.toList()));
}
}
final List<Mnv> all_mnvs = new ArrayList<>();
for (int i = 0; i + 1 < this.all_variants.size(); i++) {
final VariantContext v1 = this.all_variants.get(i);
for (int j = i + 1; j < this.all_variants.size(); j++) {
final VariantContext v2 = this.all_variants.get(j);
if (!v1.withinDistanceOf(v2, min_distance_mnv))
break;
if (v1.getStart() == v2.getStart())
continue;
all_mnvs.add(new Mnv(i, j));
}
}
final Set<String> samples = new TreeSet<>();
final SamReaderFactory srf = super.createSamReaderFactory().referenceSequence(this.faidx);
for (final Path bam : bams) {
LOG.info(String.valueOf(bam));
try (SamReader sr = srf.open(bam)) {
final SAMFileHeader header = sr.getFileHeader();
final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
final String sample = header.getReadGroups().stream().map(R -> R.getSample()).filter(S -> !StringUtils.isBlank(S)).findFirst().orElse(IOUtils.getFilenameWithoutCommonSuffixes(bam));
if (samples.contains(sample)) {
LOG.error("duplicate sample " + sample);
return -1;
}
samples.add(sample);
if (pedigree != null && pedigree.getSampleById(sample) == null) {
LOG.warn("sample " + sample + " from " + bam + " is not in pedigree.");
}
if (all_mnvs.isEmpty())
continue;
final QueryInterval[] intervals = QueryInterval.optimizeIntervals(this.all_variants.stream().map(V -> new QueryInterval(dict.getSequenceIndex(V.getContig()), V.getStart(), V.getEnd())).toArray(X -> new QueryInterval[X]));
final List<SAMRecord> sam_reads = new ArrayList<>();
try (CloseableIterator<SAMRecord> iter = sr.query(intervals, false)) {
while (iter.hasNext()) {
final SAMRecord record = iter.next();
if (!SAMRecordDefaultFilter.accept(record, this.minmapq))
continue;
if (record.getReadBases() == SAMRecord.NULL_SEQUENCE)
continue;
sam_reads.add(record);
}
}
// sort on query name
Collections.sort(sam_reads, (A, B) -> A.getReadName().compareTo(B.getReadName()));
for (final Mnv mnv : all_mnvs) {
final Phase phase = mnv.getPhase(sam_reads);
if (phase.equals(Phase.ambigous))
continue;
mnv.sample2phase.put(sample, phase);
}
}
}
try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
pw.print("#CHROM1\tPOS1\tREF1\tALT1");
pw.print("\tCHROM2\tPOS2\tREF2\tALT2");
pw.print("\tdistance");
for (final String sn : samples) pw.print("\t" + sn);
if (pedigree != null) {
pw.print("\tcase-cis\tcase-trans\tcontrol-cis\tcontrol-trans\tfisher");
}
pw.println();
for (final Mnv mnv : all_mnvs) {
if (mnv.sample2phase.values().stream().allMatch(V -> V.equals(Phase.ambigous) || V.equals(Phase.ref)))
continue;
for (int side = 0; side < 2; ++side) {
final VariantContext ctx = mnv.get(side);
if (side > 0)
pw.print("\t");
pw.print(ctx.getContig());
pw.print("\t");
pw.print(ctx.getStart());
pw.print("\t");
pw.print(ctx.getReference().getDisplayString());
pw.print("\t");
pw.print(ctx.getAlleles().get(1).getDisplayString());
}
pw.print("\t");
pw.print(CoordMath.getLength(mnv.get(0).getStart(), mnv.get(1).getEnd()));
int case_cis = 0;
int case_trans = 0;
int ctrl_cis = 0;
int ctrl_trans = 0;
for (final String sn : samples) {
pw.print("\t");
final Phase phase = mnv.sample2phase.get(sn);
if (phase == null) {
pw.print(".");
continue;
}
pw.print(phase.name());
if (pedigree != null) {
final Sample sample = pedigree.getSampleById(sn);
if (sample == null) {
// nothing
} else if (sample.isAffected()) {
if (phase.equals(Phase.cis)) {
case_cis++;
} else if (phase.equals(Phase.trans)) {
case_trans++;
}
} else if (sample.isUnaffected()) {
if (phase.equals(Phase.cis)) {
ctrl_cis++;
} else if (phase.equals(Phase.trans)) {
ctrl_trans++;
}
}
}
}
if (pedigree != null) {
pw.print("\t");
pw.print(case_cis);
pw.print("\t");
pw.print(case_trans);
pw.print("\t");
pw.print(ctrl_cis);
pw.print("\t");
pw.print(ctrl_trans);
pw.print("\t");
final FisherExactTest fisher = FisherExactTest.compute(case_cis, case_trans, ctrl_cis, ctrl_trans);
pw.print(fisher.getAsDouble());
}
pw.println();
}
pw.flush();
}
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
}
}
use of com.github.lindenb.jvarkit.pedigree.PedigreeParser in project jvarkit by lindenb.
the class VcfAlleleBalance method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VCFIterator iterin, final VariantContextWriter out) {
final VCFHeader header = iterin.getHeader();
if (this.pedigreeFile != null) {
try {
this.pedigree = new PedigreeParser().parse(this.pedigreeFile);
} catch (final Throwable err) {
LOG.error(err);
return -1;
}
} else {
this.pedigree = PedigreeParser.empty();
}
final Set<String> cases_samples = this.pedigree.getAffectedSamples().stream().map(P -> P.getId()).collect(Collectors.toSet());
final Set<String> ctr_samples = this.pedigree.getUnaffectedSamples().stream().map(P -> P.getId()).collect(Collectors.toSet());
boolean use_dp4_if_ad_missing = false;
final VCFHeader header2 = new VCFHeader(header);
if (header.hasGenotypingData()) {
if (header.getFormatHeaderLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS) == null) {
LOG.error("header is issing FORMAT/" + VCFConstants.GENOTYPE_ALLELE_DEPTHS);
header2.addMetaDataLine(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS));
final VCFFormatHeaderLine dp4h = header.getFormatHeaderLine("DP4");
if (dp4h != null && dp4h.isFixedCount() && dp4h.getCount() == 4 && dp4h.getType() == VCFHeaderLineType.Integer) {
LOG.warn("I will use FORMAT/DP4 instead of FORMAT/AD");
use_dp4_if_ad_missing = true;
}
}
for (int i = 0; i < (this.pedigree.isEmpty() ? 1 : 3); i++) {
final String prefix;
switch(i) {
case 0:
prefix = "";
break;
case 1:
prefix = CASE_PREFIX;
break;
default:
prefix = CTRL_PREFIX;
break;
}
String key = prefix + ALLELE_BALANCE_HET_KEY;
if (header.getInfoHeaderLine(key) != null) {
LOG.warn("header already contains INFO/" + key);
} else {
header2.addMetaDataLine(new VCFInfoHeaderLine(key, 1, VCFHeaderLineType.Float, prefix.replace("_", ":") + "Allele Balance for heterozygous calls (ref/(ref+alt))"));
}
key = prefix + ALLELE_BALANCE_HOM_KEY;
if (header.getInfoHeaderLine(key) != null) {
LOG.warn("header already contains INFO/" + key);
} else {
header2.addMetaDataLine(new VCFInfoHeaderLine(key, 1, VCFHeaderLineType.Float, prefix.replace("_", ":") + "Allele Balance for homozygous calls (A/(A+O)) where A is the allele (ref or alt) and O is anything other"));
}
key = prefix + NON_DIPLOID_RATIO_KEY;
if (header.getInfoHeaderLine(key) != null) {
LOG.warn("header already contains INFO/" + key);
} else {
header2.addMetaDataLine(new VCFInfoHeaderLine(key, 1, VCFHeaderLineType.Float, prefix.replace("_", ":") + "Overall non-diploid ratio (alleles/(alleles+non-alleles))"));
}
}
}
JVarkitVersion.getInstance().addMetaData(VcfAlleleBalance.class.getSimpleName(), header2);
out.writeHeader(header2);
while (iterin.hasNext()) {
VariantContext ctx = iterin.next();
if (!(ctx.hasGenotypes() || ctx.isBiallelic())) {
out.add(ctx);
continue;
}
if (use_dp4_if_ad_missing && ctx.getNAlleles() == 2) {
final List<Genotype> newgt = new ArrayList<>(ctx.getNSamples());
for (final Genotype gt : ctx.getGenotypes()) {
if (!gt.hasAnyAttribute("DP4") || gt.hasAD() || gt.isHetNonRef()) {
newgt.add(gt);
} else {
Object o = gt.getAnyAttribute("DP4");
if (o == null || !(o instanceof List) || List.class.cast(o).size() != 4)
continue;
final List<?> dp4 = (List<?>) o;
final int[] ad = new int[ctx.getNAlleles()];
Arrays.fill(ad, 0);
// Number of high-quality ref-forward , ref-reverse, alt-forward and alt-reverse bases
ad[0] = Integer.class.cast(dp4.get(0)) + Integer.class.cast(dp4.get(1));
ad[1] = Integer.class.cast(dp4.get(2)) + Integer.class.cast(dp4.get(3));
newgt.add(new GenotypeBuilder(gt).AD(ad).make());
}
}
ctx = new VariantContextBuilder(ctx).genotypes(newgt).make();
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
computeAB(ctx, vcb, (GT) -> true, "");
if (!pedigree.isEmpty()) {
computeAB(ctx, vcb, (GT) -> cases_samples.contains(GT.getSampleName()), CASE_PREFIX);
computeAB(ctx, vcb, (GT) -> ctr_samples.contains(GT.getSampleName()), CTRL_PREFIX);
}
out.add(vcb.make());
}
return 0;
}
Aggregations