use of com.github.lindenb.jvarkit.pedigree.PedigreeParser in project jvarkit by lindenb.
the class VCFFamilies method doVcfToVcf.
@Override
public int doVcfToVcf(final String inputName, VCFIterator r, final VariantContextWriter w) {
final VCFHeader header = r.getHeader();
final Pedigree pedigree;
try {
pedigree = new PedigreeParser().parse(this.pedigreeFile);
} catch (final Throwable err) {
throw new RuntimeIOException(err);
}
if (pedigree == null || pedigree.isEmpty()) {
throw new RuntimeIOException("Pedigree null/empty");
}
final VCFHeader h2 = new VCFHeader(header);
final Map<String, FamilyInfo> famidToFamilyInfo = new HashMap<>();
pedigree.getSamplesInVcfHeader(header).forEach(P -> {
final Family pedFamily = P.getFamily();
FamilyInfo finfo = famidToFamilyInfo.get(pedFamily.getId());
if (finfo == null) {
finfo = new FamilyInfo(pedFamily);
famidToFamilyInfo.put(pedFamily.getId(), finfo);
}
finfo.samples.add(P.getId());
});
famidToFamilyInfo.values().stream().flatMap(F -> F.getMetaDataLines().stream()).forEach(H -> h2.addMetaDataLine(H));
JVarkitVersion.getInstance().addMetaData(this, h2);
w.writeHeader(h2);
while (r.hasNext()) {
final VariantContext ctx = r.next();
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
final List<Allele> alts = ctx.getAlternateAlleles();
famidToFamilyInfo.values().forEach(F -> F.visit(vcb, ctx, alts));
w.add(vcb.make());
}
w.close();
return 0;
}
use of com.github.lindenb.jvarkit.pedigree.PedigreeParser in project jvarkit by lindenb.
the class VCFTrios method doVcfToVcf.
@Override
public int doVcfToVcf(final String inputName, VCFIterator r, final VariantContextWriter w) {
long count_incompats = 0L;
final Set<String> sampleNotFoundInVcf = new HashSet<>();
Pedigree pedigree = null;
final List<TrioTriple> trios = new ArrayList<>();
try {
final DeNovoDetector detector = new DeNovoDetector();
detector.setConvertingNoCallToHomRef(this.nocall_to_homref);
final VCFHeader header = r.getHeader();
final PedigreeParser pedParser = new PedigreeParser();
pedigree = pedParser.parse(this.pedigreeFile);
final VCFHeader h2 = new VCFHeader(header);
final Set<VCFHeaderLine> meta = new HashSet<>();
meta.add(new VCFInfoHeaderLine(this.attributeName, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Samples with mendelian incompatibilities." + (this.pedigreeFile == null ? "" : " Pedigree File was : " + this.pedigreeFile)));
meta.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY, true));
if (!StringUtil.isBlank(this.filterAnyIncompat)) {
meta.add(new VCFFilterHeaderLine(this.filterAnyIncompat, "Variant contains at least one mendelian incompatibilities"));
}
if (!StringUtil.isBlank(this.filterNoIncompat)) {
meta.add(new VCFFilterHeaderLine(this.filterNoIncompat, "Variant does not contain any mendelian incompatibilities"));
}
meta.stream().forEach(H -> h2.addMetaDataLine(H));
JVarkitVersion.getInstance().addMetaData(this, h2);
for (final Trio pedTrio : pedigree.getTrios()) {
final TrioTriple trio = new TrioTriple();
final Sample child = pedTrio.getChild();
trio.child_id = header.getSampleNameToOffset().getOrDefault(child.getId(), -1);
if (trio.child_id < 0)
continue;
if (pedTrio.hasFather()) {
final Sample parent = pedTrio.getFather();
trio.father_id = header.getSampleNameToOffset().getOrDefault(parent.getId(), -1);
}
if (pedTrio.hasMother()) {
final Sample parent = pedTrio.getMother();
trio.mother_id = header.getSampleNameToOffset().getOrDefault(parent.getId(), -1);
}
if (trio.father_id == -1 && trio.mother_id == -1) {
continue;
}
trios.add(trio);
}
LOG.info("trios(s) in pedigree: " + trios.size());
final ProgressFactory.Watcher<VariantContext> progress = ProgressFactory.newInstance().dictionary(header).logger(LOG).build();
w.writeHeader(h2);
while (r.hasNext()) {
final VariantContext ctx = progress.apply(r.next());
final Set<String> incompatibilities = new HashSet<String>();
for (final TrioTriple trio : trios) {
final Genotype gChild = ctx.getGenotype(trio.child_id);
if (gChild == null)
throw new IllegalStateException();
final Genotype gFather = trio.father_id < 0 ? null : ctx.getGenotype(trio.father_id);
final Genotype gMother = trio.mother_id < 0 ? null : ctx.getGenotype(trio.mother_id);
final DeNovoDetector.DeNovoMutation mut = detector.test(ctx, gFather, gMother, gChild);
if (mut != null) {
incompatibilities.add(gChild.getSampleName());
}
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
if (!incompatibilities.isEmpty()) {
// set filter for samples that are not a mendelian violation
if (!StringUtil.isBlank(this.genotypeFilterNameNoIncompat)) {
vcb.genotypes(ctx.getGenotypes().stream().map(G -> incompatibilities.contains(G.getSampleName()) ? G : new GenotypeBuilder(G).filters(this.genotypeFilterNameNoIncompat).make()).collect(Collectors.toList()));
}
++count_incompats;
// set INFO attribute
vcb.attribute(attributeName, incompatibilities.toArray());
// set FILTER
if (!StringUtil.isBlank(this.filterAnyIncompat)) {
vcb.filter(this.filterAnyIncompat);
} else if (!ctx.isFiltered()) {
vcb.passFilters();
}
} else // No denovo
{
// dicard variant
if (this.discard_variants_without_mendelian_incompat) {
continue;
}
// set filters
if (!StringUtil.isBlank(this.filterNoIncompat)) {
vcb.filter(this.filterNoIncompat);
} else if (!ctx.isFiltered()) {
vcb.passFilters();
}
}
w.add(vcb.make());
}
progress.close();
LOG.info("incompatibilitie(s) N=" + count_incompats);
if (!sampleNotFoundInVcf.isEmpty()) {
LOG.info("SAMPLE(S) not found: " + String.join(" / ", sampleNotFoundInVcf));
}
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
}
}
use of com.github.lindenb.jvarkit.pedigree.PedigreeParser in project jvarkit by lindenb.
the class SwingVcfView method doWork.
@Override
public int doWork(final List<String> args) {
try {
final List<Path> all_vcf_paths = new ArrayList<>(IOUtils.unrollPaths(args));
if (all_vcf_paths.isEmpty()) {
final JFileChooser jfc = new JFileChooser();
jfc.setMultiSelectionEnabled(true);
jfc.setFileFilter(new FileFilter() {
@Override
public String getDescription() {
return "Indexed Variant File";
}
@Override
public boolean accept(final File f) {
if (f.isDirectory())
return true;
if (!f.canRead())
return false;
if (FileExtensions.VCF_LIST.stream().noneMatch(X -> f.getName().endsWith(X)))
return false;
return true;
}
});
if (jfc.showOpenDialog(null) != JFileChooser.APPROVE_OPTION)
return -1;
final File[] sel = jfc.getSelectedFiles();
if (sel == null || sel.length == 0)
return -1;
Arrays.asList(sel).stream().map(F -> F.toPath()).forEach(P -> all_vcf_paths.add(P));
}
for (final Path path : all_vcf_paths) {
IOUtil.assertFileIsReadable(path);
}
/**
* create VCF if needed
*/
for (final Path path : all_vcf_paths) {
try {
ActionIndexVcf.indexVcf(path, false);
} catch (IOException err) {
ThrowablePane.show(null, err);
return -1;
}
}
final Pedigree ped;
if (this.pedigreePath != null) {
ped = new PedigreeParser().parse(this.pedigreePath);
} else {
ped = null;
}
JFrame.setDefaultLookAndFeelDecorated(true);
final Dimension screen = Toolkit.getDefaultToolkit().getScreenSize();
SwingUtilities.invokeAndWait(() -> {
int dx = 0;
for (final Path vcfPath : all_vcf_paths) {
final XFrame frame = new XFrame(vcfPath, defaultRegion, this.limit_number_variant, ped, this.gffPath);
frame.setBounds(50 + dx, 50 + dx, screen.width - 100, screen.height - 100);
frame.setVisible(true);
dx += 2;
}
});
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
}
}
use of com.github.lindenb.jvarkit.pedigree.PedigreeParser in project jvarkit by lindenb.
the class AbstractVcfBurden method doWork.
@Override
public int doWork(final List<String> args) {
PrintWriter pw = null;
VCFReader vcfReader = null;
VariantContextWriter vcw = null;
try {
this.pedigree = new PedigreeParser().parse(this.pedFile);
this.cases = new HashSet<>(this.pedigree.getAffectedSamples());
this.controls = new HashSet<>(this.pedigree.getUnaffectedSamples());
final String vcfIn = super.oneAndOnlyOneFile(args);
vcfReader = VCFReaderFactory.makeDefault().open(Paths.get(vcfIn), true);
final VCFHeader header = vcfReader.getHeader();
final Set<String> samplesInVcf = new HashSet<>(header.getSampleNamesInOrder());
if (this.outputVcfPath != null) {
vcw = VCFUtils.createVariantContextWriterToPath(this.outputVcfPath);
header.addMetaDataLine(new VCFInfoHeaderLine(BURDEN_KEY, 1, VCFHeaderLineType.String, "Burden key"));
JVarkitVersion.getInstance().addMetaData(this, header);
vcw.writeHeader(header);
}
this.cases.removeIf(S -> !samplesInVcf.contains(S.getId()));
this.controls.removeIf(S -> !samplesInVcf.contains(S.getId()));
if (this.cases.isEmpty()) {
LOG.error("no affected in " + this.pedFile);
return -1;
}
if (this.controls.isEmpty()) {
LOG.error("no controls in " + this.pedFile);
return -1;
}
pw = super.openPathOrStdoutAsPrintWriter(this.outputFile);
runBurden(pw, vcfReader, vcw);
pw.flush();
pw.close();
pw = null;
vcfReader.close();
vcfReader = null;
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(vcfReader);
CloserUtil.close(pw);
CloserUtil.close(vcw);
}
}
use of com.github.lindenb.jvarkit.pedigree.PedigreeParser in project jvarkit by lindenb.
the class VcfBurdenFisherH method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VCFIterator r, final VariantContextWriter w) {
final PrintWriter report;
if (this.bedExportPath == null) {
report = new PrintWriter(new NullOuputStream());
} else {
report = new PrintWriter(IOUtil.openFileForBufferedUtf8Writing(this.bedExportPath));
}
final VCFHeader header = r.getHeader();
final VCFHeader h2 = new VCFHeader(header);
final Pedigree pedigree;
try {
pedigree = new PedigreeParser().parse(this.pedigreeFile);
} catch (final IOException error) {
throw new RuntimeIOException(error);
}
final Set<Sample> individualSet = pedigree.getSamplesInVcfHeader(header).filter(S -> S.isStatusSet()).collect(Collectors.toSet());
if (individualSet.isEmpty())
throw new IllegalArgumentException("No overlapping samples between header and pedigree.");
final VCFInfoHeaderLine fisherAlleleInfoHeader = new VCFInfoHeaderLine(this.burdenHFisherTag, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Fisher Exact Test Case/Control.");
final VCFFilterHeaderLine variantFilterHeader;
if (StringUtils.isBlank(this.softFilterName)) {
variantFilterHeader = null;
} else {
variantFilterHeader = new VCFFilterHeaderLine(this.softFilterName, "Fisher case:control vs miss|have ALT not between " + this.min_fisher + " and " + this.max_fisher);
h2.addMetaDataLine(variantFilterHeader);
}
final VCFFilterHeaderLine filterCtrlgtCaseRatio;
if (StringUtils.isBlank(this.filterCtrlgtCaseRatioStr)) {
filterCtrlgtCaseRatio = null;
} else {
filterCtrlgtCaseRatio = new VCFFilterHeaderLine(this.filterCtrlgtCaseRatioStr, "The number of CONTROLS carrying the ALT allele is creater than the number of CASES carrying the ALT allele.");
h2.addMetaDataLine(filterCtrlgtCaseRatio);
}
final VCFInfoHeaderLine fisherDetailInfoHeader = new VCFInfoHeaderLine(this.burdenHFisherTag + "Detail", VCFHeaderLineCount.A, VCFHeaderLineType.String, "Fisher Exact Test Case/Control");
h2.addMetaDataLine(fisherAlleleInfoHeader);
h2.addMetaDataLine(fisherDetailInfoHeader);
JVarkitVersion.getInstance().addMetaData(this, h2);
w.writeHeader(h2);
while (r.hasNext()) {
final VariantContext ctx = r.next();
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
if (this.overwrite_qual)
vcb.log10PError(VariantContext.NO_LOG10_PERROR);
vcb.rmAttribute(fisherAlleleInfoHeader.getID());
vcb.rmAttribute(fisherDetailInfoHeader.getID());
final Set<String> oldFilters = new HashSet<>(ctx.getFilters());
if (variantFilterHeader != null)
oldFilters.remove(variantFilterHeader.getID());
if (filterCtrlgtCaseRatio != null)
oldFilters.remove(filterCtrlgtCaseRatio.getID());
if (this.ignoreFiltered && ctx.isFiltered()) {
w.add(vcb.make());
continue;
}
boolean set_filter_in_range = true;
boolean set_filter_case_ctrl_ratio = true;
boolean found_one_alt = false;
final List<String> infoData = new ArrayList<>(ctx.getAlleles().size());
final List<Double> fisherValues = new ArrayList<>(ctx.getAlleles().size());
for (final Allele observed_alt : ctx.getAlternateAlleles()) {
if (observed_alt.isNoCall()) {
infoData.add(String.join("|", "ALLELE", String.valueOf(observed_alt.getDisplayString()), "FISHER", "1.0"));
fisherValues.add(1.0);
continue;
}
/* count for fisher allele */
int count_case_have_alt = 0;
int count_case_miss_alt = 0;
int count_ctrl_have_alt = 0;
int count_ctrl_miss_alt = 0;
/* loop over persons in this pop */
for (final Sample p : individualSet) {
/* get genotype for this individual */
final Genotype genotype = ctx.getGenotype(p.getId());
/* individual is not in vcf header */
if (genotype == null || !genotype.isCalled() || (this.ignore_filtered_genotype && genotype.isFiltered())) {
if (genotype == null)
LOG.warn("Genotype is null for sample " + p.getId() + " not is pedigree!");
// no information , we consider that sample was called AND HOM REF
if (p.isAffected()) {
count_case_miss_alt++;
} else {
count_ctrl_miss_alt++;
}
continue;
}
/* loop over alleles */
final boolean genotype_contains_allele = genotype.getAlleles().stream().anyMatch(A -> A.equals(observed_alt));
/* fisher */
if (genotype_contains_allele) {
if (p.isAffected()) {
count_case_have_alt++;
;
} else {
count_ctrl_have_alt++;
}
} else {
if (p.isAffected()) {
count_case_miss_alt++;
} else {
count_ctrl_miss_alt++;
}
}
}
/* end of loop over persons */
/* fisher test for alleles */
final FisherExactTest fisherAlt = FisherExactTest.compute(count_case_have_alt, count_case_miss_alt, count_ctrl_have_alt, count_ctrl_miss_alt);
fisherValues.add(fisherAlt.getAsDouble());
infoData.add(String.join("|", "ALLELE", String.valueOf(observed_alt.getDisplayString()), "FISHER", String.valueOf(fisherAlt.getAsDouble()), "CASE_HAVE_ALT", String.valueOf(count_case_have_alt), "CASE_MISS_ALT", String.valueOf(count_case_miss_alt), "CTRL_HAVE_ALT", String.valueOf(count_ctrl_have_alt), "CTRL_MISS_ALT", String.valueOf(count_ctrl_miss_alt)));
found_one_alt = true;
final boolean is_in_range = this.min_fisher <= fisherAlt.getAsDouble() && fisherAlt.getAsDouble() <= this.max_fisher;
final int total_ctrls = count_ctrl_have_alt + count_ctrl_miss_alt;
final int total_cases = count_case_have_alt + count_case_miss_alt;
// check ratio case/control
if (total_ctrls > 0 && total_cases > 0 && (count_case_have_alt / (double) total_cases) >= (count_ctrl_have_alt / (double) total_ctrls)) {
set_filter_case_ctrl_ratio = false;
}
if (is_in_range) {
set_filter_in_range = false;
}
if (this.bedExportPath != null && is_in_range) {
report.print(ctx.getContig());
report.print('\t');
report.print(ctx.getStart() - 1);
report.print('\t');
report.print(ctx.getEnd());
report.print('\t');
report.print(ctx.getReference().getDisplayString());
report.print('\t');
report.print(observed_alt.getDisplayString());
report.print('\t');
report.print(fisherAlt.getAsDouble());
report.print('\t');
report.print(count_case_have_alt);
report.print('\t');
report.print(count_case_miss_alt);
report.print('\t');
report.print(count_ctrl_have_alt);
report.print('\t');
report.print(count_ctrl_miss_alt);
report.println();
}
}
// better than nothing, otherwise i'll mess with the filters
if (!found_one_alt) {
w.add(vcb.make());
continue;
}
// soft filter, skip variant
if ((set_filter_in_range && variantFilterHeader == null) || (set_filter_case_ctrl_ratio && filterCtrlgtCaseRatio == null))
// skip variant
continue;
vcb.attribute(fisherAlleleInfoHeader.getID(), fisherValues);
vcb.attribute(fisherDetailInfoHeader.getID(), infoData);
if (this.overwrite_qual) {
final OptionalDouble minV = fisherValues.stream().mapToDouble(V -> V.doubleValue()).min();
if (minV.isPresent())
vcb.log10PError(Math.max(1.0E-100, /* arbitrary */
minV.getAsDouble()) / -10);
}
if (set_filter_in_range && variantFilterHeader != null) {
vcb.filter(variantFilterHeader.getID());
// only set this one if the filter is set above
if (set_filter_case_ctrl_ratio && filterCtrlgtCaseRatio != null) {
vcb.filter(filterCtrlgtCaseRatio.getID());
}
}
w.add(vcb.make());
}
w.close();
report.flush();
report.close();
return 0;
}
Aggregations