use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class RDFVcfWriter method writeHeader.
public void writeHeader(VCFHeader header, URI source) {
if (this.header != null)
throw new RuntimeException("Header was already written");
this.header = header;
this.source = source;
if (this.source == null)
this.source = URI.create("urn:source/id" + (++id_generator));
try {
writeStartDocument();
this.w.writeStartElement(PFX, "Source", NS);
this.w.writeAttribute("rdf", RDF, "about", this.source.toString());
this.w.writeStartElement("dc", "title", DC);
this.w.writeCharacters(this.source.toString());
// dc:title
this.w.writeEndElement();
this.w.writeEndElement();
SAMSequenceDictionary dict = header.getSequenceDictionary();
if (dict != null) {
for (SAMSequenceRecord ssr : dict.getSequences()) {
this.w.writeStartElement(PFX, "Chromosome", NS);
this.w.writeAttribute("rdf", RDF, "about", "urn:chromosome/" + ssr.getSequenceName());
this.w.writeStartElement("dc", "title", DC);
this.w.writeCharacters(ssr.getSequenceName());
// dc:title
this.w.writeEndElement();
this.w.writeStartElement(PFX, "length", NS);
datatype("int");
this.w.writeCharacters(String.valueOf(ssr.getSequenceLength()));
// length
this.w.writeEndElement();
this.w.writeStartElement(PFX, "index", NS);
datatype("int");
this.w.writeCharacters(String.valueOf(ssr.getSequenceIndex()));
// length
this.w.writeEndElement();
// Chromosome
this.w.writeEndElement();
}
}
key2infoHandler.put(SnpEffPredictionParser.getDefaultTag(), new SnpEffHandler());
key2infoHandler.put(VepPredictionParser.getDefaultTag(), new VepHandler());
key2infoHandler.put(VCFPredictions.TAG, new MyPredictionHandler());
for (VCFInfoHeaderLine h : header.getInfoHeaderLines()) {
RDFVcfInfoHandler handler = key2infoHandler.get(h.getID());
if (handler == null) {
LOG.info("creating default handler for INFO:" + h.getID());
handler = createDefaultRdfVcfInfoHandlerFor(h);
key2infoHandler.put(handler.getKey(), handler);
}
handler.init(h);
}
for (VCFFilterHeaderLine h : header.getFilterLines()) {
this.w.writeStartElement(PFX, "Filter", NS);
this.w.writeAttribute("rdf", RDF, "about", "urn:filter/" + h.getKey());
this.w.writeStartElement("dc", "title", DC);
this.w.writeCharacters(h.getKey());
// dc:title
this.w.writeEndElement();
this.w.writeStartElement("dc", "description", DC);
this.w.writeCharacters(h.getValue());
// dc:title
this.w.writeEndElement();
// Filter
this.w.writeEndElement();
}
// Sample
for (String sample : header.getSampleNamesInOrder()) {
this.w.writeStartElement(PFX, "Sample", NS);
this.w.writeAttribute("rdf", RDF, "about", "urn:sample/" + sample);
this.w.writeStartElement("dc", "title", DC);
this.w.writeCharacters(sample);
// dc:title
this.w.writeEndElement();
// rdf:RDF
this.w.writeEndElement();
}
} catch (Exception e) {
throw new RuntimeException("close failed", e);
}
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class InfoTreeModel method getCSQCols.
private List<String> getCSQCols(VCFHeader header) {
VCFInfoHeaderLine ihl = header.getInfoHeaderLine("CSQ");
if (ihl == null)
return Collections.emptyList();
String description = ihl.getDescription();
String chunck = " Format:";
int i = description.indexOf(chunck);
if (i == -1) {
LOG.warn("Cannot find " + chunck + " in " + description);
return Collections.emptyList();
}
description = description.substring(i + chunck.length()).replaceAll("[ \'\\.\\(\\)]+", "").trim();
String[] tokens = pipeVep.split(description);
ArrayList<String> L = new ArrayList<String>(tokens.length);
for (String s : tokens) {
if (s.trim().isEmpty())
continue;
L.add(s);
}
return L;
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class InfoTreeModel method getEFFCols.
private List<String> getEFFCols(VCFHeader header) {
VCFInfoHeaderLine ihl = header.getInfoHeaderLine("EFF");
if (ihl == null)
return Collections.emptyList();
String description = ihl.getDescription();
String chunck = "Format:";
int i = description.indexOf(chunck);
if (i == -1) {
LOG.warn("Cannot find " + chunck + " in " + description);
return Collections.emptyList();
}
description = description.substring(i + chunck.length()).replace('(', '|').replaceAll("[ \'\\.)\\[\\]]+", "").trim();
String[] tokens = pipeSnpEff.split(description);
ArrayList<String> L = new ArrayList<String>(tokens.length);
for (String s : tokens) {
if (s.trim().isEmpty())
continue;
L.add(s);
}
return L;
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class Biostar140111 method parseGenotypes.
/**
* parses NCBI GT output
*/
private void parseGenotypes(XMLEventReader r, OutputStream outStream) throws Exception {
final Pattern dnaRegex = Pattern.compile("[ATGCatgc]+");
final QName attInId = new QName("indId");
Set<String> samples = new TreeSet<>();
Set<VCFHeaderLine> metaData = new HashSet<>();
metaData.add(new VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.String, "Genotype"));
metaData.add(new VCFFormatHeaderLine("DP", 1, VCFHeaderLineType.Integer, "Depth"));
metaData.add(new VCFInfoHeaderLine("SnpInfoObserved", 1, VCFHeaderLineType.String, "SnpInfo : Oberved"));
VCFHeader header = null;
VariantContextWriter w = null;
while (r.hasNext()) {
XMLEvent evt = r.peek();
if (evt.isStartElement()) {
StartElement startE = evt.asStartElement();
String localName = startE.getName().getLocalPart();
if (localName.equals("SnpInfo")) {
if (header == null) {
header = new VCFHeader(metaData, samples);
w = VCFUtils.createVariantContextWriterToOutputStream(outStream);
w.writeHeader(header);
}
SnpInfo snpInfo = this.unmarshaller.unmarshal(r, SnpInfo.class).getValue();
if (snpInfo.getSnpLoc().isEmpty()) {
LOG.warning("no snploc for rs" + snpInfo.getRsId());
}
for (SnpLoc snpLoc : snpInfo.getSnpLoc()) {
int chromStart;
try {
chromStart = Integer.parseInt(snpLoc.getStart());
} catch (Exception e) {
LOG.warning("bad start in rs" + snpInfo.getRsId() + " " + snpLoc.getStart());
continue;
}
chromStart++;
String contigAllele = snpLoc.getContigAllele();
if (contigAllele == null || !dnaRegex.matcher(contigAllele).matches()) {
LOG.warning("bad contigAllele in rs" + snpInfo.getRsId() + " " + contigAllele);
continue;
}
if (!"fwd".equals(snpLoc.getRsOrientToChrom())) {
contigAllele = AcidNucleics.reverseComplement(contigAllele);
}
Allele ref = Allele.create(contigAllele, true);
Map<String, Genotype> sample2genotype = new HashMap<>();
for (SsInfo ssinfo : snpInfo.getSsInfo()) {
boolean revcomp = !"fwd".equals(snpLoc.getRsOrientToChrom());
if (!"fwd".equals(ssinfo.getSsOrientToRs()))
revcomp = !revcomp;
for (ByPop byPop : ssinfo.getByPop()) {
for (GTypeByInd gt : byPop.getGTypeByInd()) {
String sample = String.valueOf(gt.getIndId());
if (!samples.contains(sample)) {
LOG.warning("Undefined sample:" + sample);
continue;
}
boolean ok = true;
String[] tokens = gt.getGtype().split("[/]");
if (tokens.length == 1) {
tokens = new String[] { tokens[0], tokens[0] };
} else if (tokens.length != 2) {
LOG.warning("Bad genotypes in sample:" + sample + " " + gt.getGtype());
continue;
}
List<Allele> sampleAlleles = new ArrayList<>(2);
for (int i = 0; i < tokens.length; ++i) {
if (revcomp)
tokens[i] = AcidNucleics.reverseComplement(tokens[i]);
if (!dnaRegex.matcher(tokens[i]).matches()) {
ok = false;
break;
}
sampleAlleles.add(tokens[i].equalsIgnoreCase(contigAllele) ? ref : Allele.create(tokens[i], false));
}
if (!ok)
continue;
GenotypeBuilder gb = new GenotypeBuilder(sample, sampleAlleles);
sample2genotype.put(sample, gb.make());
}
}
}
Set<Allele> alleles = new HashSet<>();
alleles.add(ref);
for (String sample : samples) {
if (!sample2genotype.containsKey(sample)) {
sample2genotype.put(sample, GenotypeBuilder.createMissing(sample, 2));
} else {
alleles.addAll(sample2genotype.get(sample).getAlleles());
}
}
VariantContextBuilder vcb = new VariantContextBuilder("dbsnp", snpLoc.getChrom(), chromStart, chromStart + ref.getBaseString().length() - 1, alleles);
if (snpInfo.getObserved() != null) {
vcb.attribute("SnpInfoObserved", VCFUtils.escapeInfoField(snpInfo.getObserved()));
}
vcb.genotypes(sample2genotype.values());
vcb.id("rs" + snpInfo.getRsId());
w.add(vcb.make());
}
} else if (localName.equals("Individual")) {
if (header != null)
throw new XMLStreamException("Error got " + localName + " after genotypes", evt.getLocation());
Attribute att = startE.getAttributeByName(attInId);
if (att == null)
throw new XMLStreamException("Cannot get " + attInId, evt.getLocation());
samples.add(att.getValue());
// consumme
r.next();
} else {
r.next();
}
} else {
// consumme
r.next();
}
}
if (w == null)
throw new IOException("No Genotype was found");
w.close();
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class CaseControlJfx method doWork.
@Override
public int doWork(final Stage primaryStage, final List<String> args) {
final VariantPartition partition;
Pedigree pedigree = null;
VcfIterator in = null;
try {
switch(this.partitionType) {
case variantType:
partition = new VariantTypePartition();
break;
case chromosome:
partition = new ChromosomePartition();
break;
case autosomes:
partition = new SexualContigPartition();
break;
case qual:
partition = new QualPartition();
break;
case vqslod:
partition = new VQSLODPartition();
break;
case typeFilter:
partition = new TypeAndFilterPartiton();
break;
case distance:
partition = new DisanceToDiagonalPartiton();
break;
case n_alts:
partition = new NAltsPartition();
break;
default:
throw new IllegalStateException(this.partitionType.name());
}
if (args.isEmpty()) {
in = VCFUtils.createVcfIteratorStdin();
primaryStage.setTitle(CaseControlJfx.class.getSimpleName());
} else if (args.size() == 1) {
in = VCFUtils.createVcfIterator(args.get(0));
primaryStage.setTitle(args.get(0));
} else {
LOG.error("Illegal Number of arguments: " + args);
return -1;
}
if (this.pedigreeFile != null) {
pedigree = Pedigree.newParser().parse(this.pedigreeFile);
} else {
pedigree = Pedigree.newParser().parse(in.getHeader());
}
if (this.controlTag != null) {
final VCFInfoHeaderLine infoHeaderLine = in.getHeader().getInfoHeaderLine(this.controlTag);
if (infoHeaderLine == null) {
LOG.error("No such attribute in the VCF header: " + this.controlTag);
return -1;
}
}
int count = 0;
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader());
while (in.hasNext() && (this.limit_to_N_variants < 0 || count < this.limit_to_N_variants)) {
final VariantContext ctx = progress.watch(in.next());
if (this.ignore_ctx_filtered && ctx.isFiltered())
continue;
++count;
final List<Allele> alternates = ctx.getAlternateAlleles();
for (int alt_idx = 0; alt_idx < alternates.size(); ++alt_idx) {
final Allele alt = alternates.get(alt_idx);
final Double[] mafs = { null, null };
for (int i = 0; i < 2; ++i) {
if (i == 1 && this.controlTag != null) {
if (ctx.hasAttribute(this.controlTag)) {
try {
final List<Double> dvals = ctx.getAttributeAsDoubleList(this.controlTag, Double.NaN);
if (alt_idx < dvals.size() && dvals.get(alt_idx) != null) {
final double d = dvals.get(alt_idx);
if (!Double.isNaN(d) && d >= 0 && d <= 1.0)
mafs[1] = d;
}
} catch (NumberFormatException err) {
}
}
} else {
final MafCalculator mafCalculator = new MafCalculator(alt, ctx.getContig());
mafCalculator.setNoCallIsHomRef(no_call_is_homref);
for (Pedigree.Person person : (i == 0 ? pedigree.getAffected() : pedigree.getUnaffected())) {
if (selectSamples.equals(SelectSamples.males) && !person.isMale())
continue;
if (selectSamples.equals(SelectSamples.females) && !person.isFemale())
continue;
final Genotype genotype = ctx.getGenotype(person.getId());
if (genotype == null)
continue;
if (ignore_gt_filtered && genotype.isFiltered())
continue;
mafCalculator.add(genotype, person.isMale());
}
if (!mafCalculator.isEmpty()) {
mafs[i] = mafCalculator.getMaf();
}
}
}
if (mafs[0] == null || mafs[1] == null)
continue;
final XYChart.Data<Number, Number> data = new XYChart.Data<Number, Number>(mafs[0], mafs[1]);
if (this.add_tooltip && this.outputFile == null) {
data.setExtraValue(ctx.getContig() + ":" + ctx.getStart());
}
partition.add(ctx, pedigree, data);
}
}
progress.finish();
in.close();
in = null;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(in);
}
final NumberAxis xAxis = new NumberAxis(0.0, 1.0, 0.1);
xAxis.setLabel("Cases");
final NumberAxis yAxis = new NumberAxis(0.0, 1.0, 0.1);
yAxis.setLabel("Controls" + (this.controlTag == null ? "" : "[" + this.controlTag + "]"));
final ScatterChart<Number, Number> chart = new ScatterChart<>(xAxis, yAxis);
for (final XYChart.Series<Number, Number> series : partition.getSeries()) {
chart.getData().add(series);
}
String title = "Case/Control";
if (!args.isEmpty()) {
title = args.get(0);
int slash = title.lastIndexOf("/");
if (slash != -1)
title = title.substring(slash + 1);
if (title.endsWith(".vcf.gz"))
title = title.substring(0, title.length() - 7);
if (title.endsWith(".vcf"))
title = title.substring(0, title.length() - 4);
}
if (userTitle != null)
title = userTitle;
chart.setTitle(title);
chart.setAnimated(false);
chart.setLegendSide(this.legendSide);
final VBox root = new VBox();
MenuBar menuBar = new MenuBar();
Menu menu = new Menu("File");
MenuItem item = new MenuItem("Save image as...");
item.setOnAction(AE -> {
doMenuSave(chart);
});
menu.getItems().add(item);
menu.getItems().add(new SeparatorMenuItem());
item = new MenuItem("Quit");
item.setOnAction(AE -> {
Platform.exit();
});
menu.getItems().add(item);
menuBar.getMenus().add(menu);
root.getChildren().add(menuBar);
final BorderPane contentPane = new BorderPane();
contentPane.setCenter(chart);
root.getChildren().add(contentPane);
Rectangle2D screen = Screen.getPrimary().getVisualBounds();
double minw = Math.max(Math.min(screen.getWidth(), screen.getHeight()) - 50, 50);
chart.setPrefSize(minw, minw);
final Scene scene = new Scene(root, minw, minw);
primaryStage.setScene(scene);
if (this.outputFile != null) {
primaryStage.setOnShown(WE -> {
LOG.info("saving as " + this.outputFile);
try {
saveImageAs(chart, this.outputFile);
} catch (IOException err) {
LOG.error(err);
System.exit(-1);
}
Platform.exit();
});
}
primaryStage.show();
if (this.outputFile == null) {
// http://stackoverflow.com/questions/14117867
for (final XYChart.Series<Number, Number> series : partition.getSeries()) {
for (XYChart.Data<Number, Number> d : series.getData()) {
if (dataOpacity >= 0 && dataOpacity < 1.0) {
d.getNode().setStyle(d.getNode().getStyle() + "-fx-opacity:0.3;");
}
if (this.add_tooltip) {
final Tooltip tooltip = new Tooltip();
tooltip.setText(String.format("%s (%f / %f)", String.valueOf(d.getExtraValue()), d.getXValue().doubleValue(), d.getYValue().doubleValue()));
Tooltip.install(d.getNode(), tooltip);
}
}
}
}
return 0;
}
Aggregations