use of com.github.lindenb.jvarkit.util.Pedigree in project jvarkit by lindenb.
the class VcfStats method doWork.
@Override
public int doWork(final List<String> args) {
if (this.binSize <= 0) {
LOG.error("binSize < 0");
return -1;
}
VariantContextWriter teeOut = null;
VcfIterator iter = null;
final Map<String, VariantStats> category2stats = new HashMap<>();
PrintWriter makefileWriter = null;
try {
this.archiveFactory = ArchiveFactory.open(this.outputFile);
if (this.tee)
teeOut = super.openVariantContextWriter(null);
iter = super.openVcfIterator(oneFileOrNull(args));
final VCFHeader header = iter.getHeader();
this.sampleNamesInOrder = Collections.unmodifiableList(header.getSampleNamesInOrder());
final SAMSequenceDictionary dict = header.getSequenceDictionary();
if (dict != null && !dict.isEmpty()) {
this.the_dictionary = dict;
}
if (this.kgFile != null) {
LOG.info("load " + kgFile);
this.knownGeneTreeMap = KnownGene.loadUriAsIntervalTreeMap(this.kgFile, KG -> (dict == null || dict.getSequence(KG.getContig()) != null));
} else {
this.knownGeneTreeMap = null;
}
if (this.pedigreeFile != null) {
this.pedigree = Pedigree.newParser().parse(this.pedigreeFile);
} else {
Pedigree tmpPed = null;
try {
tmpPed = Pedigree.newParser().parse(header);
} catch (Exception err) {
tmpPed = Pedigree.createEmptyPedigree();
}
this.pedigree = tmpPed;
}
makefileWriter = this.archiveFactory.openWriter(this.prefix + "Makefile");
makefileWriter.println(".PHONY: all all_targets ");
makefileWriter.println("SCREEN_WIDTH?=2600");
makefileWriter.println("SCREEN_HEIGHT?=1000");
makefileWriter.println("ALL_TARGETS=");
makefileWriter.println("all: all_targets");
if (teeOut != null)
teeOut.writeHeader(header);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
while (iter.hasNext()) {
final VariantContext ctx = progress.watch(iter.next());
if (teeOut != null)
teeOut.add(ctx);
for (final String category : this.variantToCategoryKeys.apply(ctx)) {
VariantStats vcstat = category2stats.get(category);
if (vcstat == null) {
vcstat = new VariantStats(category, header);
category2stats.put(category, vcstat);
}
vcstat.visit(ctx);
}
}
for (final String category : category2stats.keySet()) {
final VariantStats vcstats = category2stats.get(category);
vcstats.finish(makefileWriter);
}
progress.finish();
makefileWriter.println("all_targets : ${ALL_TARGETS}");
makefileWriter.flush();
makefileWriter.close();
makefileWriter = null;
iter.close();
iter = null;
this.archiveFactory.close();
archiveFactory = null;
if (teeOut != null)
teeOut.close();
teeOut = null;
return 0;
} catch (Exception e) {
LOG.error(e);
return -1;
} finally {
knownGeneTreeMap = null;
CloserUtil.close(archiveFactory);
CloserUtil.close(teeOut);
CloserUtil.close(iter);
CloserUtil.close(makefileWriter);
}
}
use of com.github.lindenb.jvarkit.util.Pedigree in project jvarkit by lindenb.
the class CaseControlJfx method doWork.
@Override
public int doWork(final Stage primaryStage, final List<String> args) {
final VariantPartition partition;
Pedigree pedigree = null;
VcfIterator in = null;
try {
switch(this.partitionType) {
case variantType:
partition = new VariantTypePartition();
break;
case chromosome:
partition = new ChromosomePartition();
break;
case autosomes:
partition = new SexualContigPartition();
break;
case qual:
partition = new QualPartition();
break;
case vqslod:
partition = new VQSLODPartition();
break;
case typeFilter:
partition = new TypeAndFilterPartiton();
break;
case distance:
partition = new DisanceToDiagonalPartiton();
break;
case n_alts:
partition = new NAltsPartition();
break;
default:
throw new IllegalStateException(this.partitionType.name());
}
if (args.isEmpty()) {
in = VCFUtils.createVcfIteratorStdin();
primaryStage.setTitle(CaseControlJfx.class.getSimpleName());
} else if (args.size() == 1) {
in = VCFUtils.createVcfIterator(args.get(0));
primaryStage.setTitle(args.get(0));
} else {
LOG.error("Illegal Number of arguments: " + args);
return -1;
}
if (this.pedigreeFile != null) {
pedigree = Pedigree.newParser().parse(this.pedigreeFile);
} else {
pedigree = Pedigree.newParser().parse(in.getHeader());
}
if (this.controlTag != null) {
final VCFInfoHeaderLine infoHeaderLine = in.getHeader().getInfoHeaderLine(this.controlTag);
if (infoHeaderLine == null) {
LOG.error("No such attribute in the VCF header: " + this.controlTag);
return -1;
}
}
int count = 0;
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader());
while (in.hasNext() && (this.limit_to_N_variants < 0 || count < this.limit_to_N_variants)) {
final VariantContext ctx = progress.watch(in.next());
if (this.ignore_ctx_filtered && ctx.isFiltered())
continue;
++count;
final List<Allele> alternates = ctx.getAlternateAlleles();
for (int alt_idx = 0; alt_idx < alternates.size(); ++alt_idx) {
final Allele alt = alternates.get(alt_idx);
final Double[] mafs = { null, null };
for (int i = 0; i < 2; ++i) {
if (i == 1 && this.controlTag != null) {
if (ctx.hasAttribute(this.controlTag)) {
try {
final List<Double> dvals = ctx.getAttributeAsDoubleList(this.controlTag, Double.NaN);
if (alt_idx < dvals.size() && dvals.get(alt_idx) != null) {
final double d = dvals.get(alt_idx);
if (!Double.isNaN(d) && d >= 0 && d <= 1.0)
mafs[1] = d;
}
} catch (NumberFormatException err) {
}
}
} else {
final MafCalculator mafCalculator = new MafCalculator(alt, ctx.getContig());
mafCalculator.setNoCallIsHomRef(no_call_is_homref);
for (Pedigree.Person person : (i == 0 ? pedigree.getAffected() : pedigree.getUnaffected())) {
if (selectSamples.equals(SelectSamples.males) && !person.isMale())
continue;
if (selectSamples.equals(SelectSamples.females) && !person.isFemale())
continue;
final Genotype genotype = ctx.getGenotype(person.getId());
if (genotype == null)
continue;
if (ignore_gt_filtered && genotype.isFiltered())
continue;
mafCalculator.add(genotype, person.isMale());
}
if (!mafCalculator.isEmpty()) {
mafs[i] = mafCalculator.getMaf();
}
}
}
if (mafs[0] == null || mafs[1] == null)
continue;
final XYChart.Data<Number, Number> data = new XYChart.Data<Number, Number>(mafs[0], mafs[1]);
if (this.add_tooltip && this.outputFile == null) {
data.setExtraValue(ctx.getContig() + ":" + ctx.getStart());
}
partition.add(ctx, pedigree, data);
}
}
progress.finish();
in.close();
in = null;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(in);
}
final NumberAxis xAxis = new NumberAxis(0.0, 1.0, 0.1);
xAxis.setLabel("Cases");
final NumberAxis yAxis = new NumberAxis(0.0, 1.0, 0.1);
yAxis.setLabel("Controls" + (this.controlTag == null ? "" : "[" + this.controlTag + "]"));
final ScatterChart<Number, Number> chart = new ScatterChart<>(xAxis, yAxis);
for (final XYChart.Series<Number, Number> series : partition.getSeries()) {
chart.getData().add(series);
}
String title = "Case/Control";
if (!args.isEmpty()) {
title = args.get(0);
int slash = title.lastIndexOf("/");
if (slash != -1)
title = title.substring(slash + 1);
if (title.endsWith(".vcf.gz"))
title = title.substring(0, title.length() - 7);
if (title.endsWith(".vcf"))
title = title.substring(0, title.length() - 4);
}
if (userTitle != null)
title = userTitle;
chart.setTitle(title);
chart.setAnimated(false);
chart.setLegendSide(this.legendSide);
final VBox root = new VBox();
MenuBar menuBar = new MenuBar();
Menu menu = new Menu("File");
MenuItem item = new MenuItem("Save image as...");
item.setOnAction(AE -> {
doMenuSave(chart);
});
menu.getItems().add(item);
menu.getItems().add(new SeparatorMenuItem());
item = new MenuItem("Quit");
item.setOnAction(AE -> {
Platform.exit();
});
menu.getItems().add(item);
menuBar.getMenus().add(menu);
root.getChildren().add(menuBar);
final BorderPane contentPane = new BorderPane();
contentPane.setCenter(chart);
root.getChildren().add(contentPane);
Rectangle2D screen = Screen.getPrimary().getVisualBounds();
double minw = Math.max(Math.min(screen.getWidth(), screen.getHeight()) - 50, 50);
chart.setPrefSize(minw, minw);
final Scene scene = new Scene(root, minw, minw);
primaryStage.setScene(scene);
if (this.outputFile != null) {
primaryStage.setOnShown(WE -> {
LOG.info("saving as " + this.outputFile);
try {
saveImageAs(chart, this.outputFile);
} catch (IOException err) {
LOG.error(err);
System.exit(-1);
}
Platform.exit();
});
}
primaryStage.show();
if (this.outputFile == null) {
// http://stackoverflow.com/questions/14117867
for (final XYChart.Series<Number, Number> series : partition.getSeries()) {
for (XYChart.Data<Number, Number> d : series.getData()) {
if (dataOpacity >= 0 && dataOpacity < 1.0) {
d.getNode().setStyle(d.getNode().getStyle() + "-fx-opacity:0.3;");
}
if (this.add_tooltip) {
final Tooltip tooltip = new Tooltip();
tooltip.setText(String.format("%s (%f / %f)", String.valueOf(d.getExtraValue()), d.getXValue().doubleValue(), d.getYValue().doubleValue()));
Tooltip.install(d.getNode(), tooltip);
}
}
}
}
return 0;
}
use of com.github.lindenb.jvarkit.util.Pedigree in project jvarkit by lindenb.
the class VcfBurdenGoEnrichment method doWork.
@Override
public int doWork(final List<String> args) {
if (StringUtil.isBlank(this.readingGo.goUri)) {
LOG.error("Undefined GOs uri.");
return -1;
}
if (this.geneFile == null || !this.geneFile.exists()) {
LOG.error("Undefined gene file option.");
return -1;
}
try {
final GoTree gotree = this.readingGo.createParser().setIgnoreDbXRef(true).parse(this.readingGo.goUri);
List<GoTree.Term> terms = new ArrayList<>(gotree.getTerms());
final Map<GoTree.Term, Node> term2node = new HashMap<>();
// build the node TREE
while (!terms.isEmpty()) {
int i = 0;
while (i < terms.size()) {
final GoTree.Term t = terms.get(i);
if (!t.hasRelations()) {
term2node.put(t, new Node(t));
terms.remove(i);
} else if (t.getRelations().stream().allMatch(L -> term2node.containsKey(L.getTo()))) {
final Node n = new Node(t);
n.parents.addAll(t.getRelations().stream().map(L -> term2node.get(L.getTo())).collect(Collectors.toSet()));
term2node.put(t, n);
terms.remove(i);
} else {
i++;
}
}
}
terms = null;
final Set<String> unknownAcn = new HashSet<>();
final Map<String, Set<Node>> gene2node = new HashMap<>();
final BufferedReader r = IOUtils.openFileForBufferedReading(this.geneFile);
String line;
while ((line = r.readLine()) != null) {
if (line.isEmpty() || line.startsWith("#"))
continue;
final int t = line.indexOf('\t');
if (t == -1) {
r.close();
LOG.error("tab missing in " + line + " of " + this.geneFile);
return -1;
}
final String gene = line.substring(0, t).trim();
if (StringUtil.isBlank(gene)) {
r.close();
LOG.error("Emtpy gene in " + line);
return -1;
}
// using getTermByName because found sysnonym in GOA
final String termAcn = line.substring(t + 1).trim();
if (unknownAcn.contains(termAcn))
continue;
final GoTree.Term term = gotree.getTermByName(termAcn);
if (term == null && !unknownAcn.contains(termAcn)) {
unknownAcn.add(termAcn);
LOG.warning("Don't know this GO term in " + line + " of " + this.geneFile + ". Could be obsolete, synonym, go specific division. Skipping.");
continue;
}
final Node node = term2node.get(term);
if (node == null) {
r.close();
LOG.error("Don't know this node in " + line + " of " + this.geneFile);
return -1;
}
Set<Node> nodes = gene2node.get(gene);
if (nodes == null) {
nodes = new HashSet<>();
gene2node.put(gene, nodes);
}
node.numGenes++;
nodes.add(node);
}
;
// clean up
unknownAcn.clear();
r.close();
final VcfIterator iter = openVcfIterator(oneFileOrNull(args));
final VCFHeader header = iter.getHeader();
final VepPredictionParser vepParser = new VepPredictionParserFactory(header).get();
final AnnPredictionParser annParser = new AnnPredictionParserFactory(header).get();
final Set<Pedigree.Person> persons;
if (this.pedFile != null) {
final Pedigree pedigree = Pedigree.newParser().parse(this.pedFile);
persons = new Pedigree.CaseControlExtractor().extract(header, pedigree);
} else {
persons = new Pedigree.CaseControlExtractor().extract(header);
}
final Set<Pedigree.Person> affected = persons.stream().filter(P -> P.isAffected()).collect(Collectors.toSet());
final Set<Pedigree.Person> unaffected = persons.stream().filter(P -> P.isUnaffected()).collect(Collectors.toSet());
if (affected.isEmpty()) {
LOG.error("No Affected individual");
return -1;
}
if (unaffected.isEmpty()) {
LOG.error("No unaffected individual");
return -1;
}
final List<String> lookColumns = Arrays.asList("CCDS", "Feature", "ENSP", "Gene", "HGNC", "HGNC_ID", "SYMBOL", "RefSeq");
final Predicate<Genotype> isWildGenotype = G -> {
if (G == null)
return false;
return G.isHomRef();
};
final Predicate<Genotype> isAltGenotype = G -> {
if (G == null)
return false;
return G.isCalled() && !G.isHomRef();
};
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
while (iter.hasNext()) {
final VariantContext ctx = progress.watch(iter.next());
if (!this.variantFilter.test(ctx))
continue;
final Set<String> genes = new HashSet<>();
for (final String predStr : ctx.getAttributeAsList(vepParser.getTag()).stream().map(O -> String.class.cast(O)).collect(Collectors.toList())) {
final VepPredictionParser.VepPrediction pred = vepParser.parseOnePrediction(ctx, predStr);
for (final String col : lookColumns) {
final String token = pred.getByCol(col);
if (!StringUtil.isBlank(token)) {
genes.add(token);
}
}
}
for (final String predStr : ctx.getAttributeAsList(annParser.getTag()).stream().map(O -> String.class.cast(O)).collect(Collectors.toList())) {
final AnnPredictionParser.AnnPrediction pred = annParser.parseOnePrediction(predStr);
final String token = pred.getGeneName();
if (!StringUtil.isBlank(token)) {
genes.add(token);
}
}
if (genes.isEmpty())
continue;
final Set<Node> nodes = genes.stream().filter(G -> gene2node.containsKey(G)).flatMap(G -> gene2node.get(G).stream()).collect(Collectors.toSet());
if (nodes.isEmpty())
continue;
final long unaffected_alt = unaffected.stream().map(P -> ctx.getGenotype(P.getId())).filter(G -> this.genotypeFilter.test(ctx, G)).filter(isAltGenotype).count();
final long affected_alt = affected.stream().map(P -> ctx.getGenotype(P.getId())).filter(G -> this.genotypeFilter.test(ctx, G)).filter(isAltGenotype).count();
/* no informative */
if (unaffected_alt + affected_alt == 0L) {
continue;
}
final long affected_ref = affected.stream().map(P -> ctx.getGenotype(P.getId())).filter(G -> this.genotypeFilter.test(ctx, G)).filter(isWildGenotype).count();
final long unaffected_ref = unaffected.stream().map(P -> ctx.getGenotype(P.getId())).filter(G -> this.genotypeFilter.test(ctx, G)).filter(isWildGenotype).count();
nodes.stream().forEach(N -> N.resetVisitedFlag());
nodes.stream().forEach(N -> N.visit(unaffected_ref, unaffected_alt, affected_ref, affected_alt));
}
iter.close();
progress.finish();
LOG.info("Calculating Fisher and dumping.. please wait");
final PrintWriter pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
pw.println("#go_term\tfisher\tname\tgo_term_depth\tcount_genes_in_this_node" + "\tunaffected_ref_gt" + "\tunaffected_alt_gt" + "\taffected_ref_gt" + "\taffected_alt_gt");
term2node.values().stream().filter(N -> this.show_never_seeen_term || N.sum() > 0L).sorted((n1, n2) -> Double.compare(n1.fisher(), n2.fisher())).forEach(N -> {
pw.print(N.goTerm.getAcn());
pw.print('\t');
pw.print(N.fisher());
pw.print("\t");
pw.print(N.goTerm.getName().replaceAll("[ \',\\-]+", "_"));
pw.print("\t");
pw.print(N.goTerm.getMinDepth());
pw.print('\t');
pw.print(N.numGenes);
pw.print('\t');
pw.print(N.unaffected_ref);
pw.print('\t');
pw.print(N.unaffected_alt);
pw.print('\t');
pw.print(N.affected_ref);
pw.print('\t');
pw.print(N.affected_alt);
pw.println();
});
pw.flush();
pw.close();
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
}
}
Aggregations