use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.
the class GroupByGenotypes method map.
@Override
public Map<Category, Long> map(final RefMetaDataTracker tracker, final ReferenceContext refctx, final AlignmentContext context) {
if (tracker == null)
return Collections.emptyMap();
final Map<Category, Long> counts = new HashMap<>();
for (final VariantContext ctx : tracker.getValues(this.variants, context.getLocation())) {
int index_singleton = -1;
if (onlysingletons) {
for (int i = 0; i < ctx.getNSamples(); ++i) {
final Genotype g = ctx.getGenotype(i);
if (g == null || !g.isCalled() || g.isNoCall() || g.isHomRef())
continue;
if (index_singleton != -1) {
// not anymore a singleton
index_singleton = -1;
break;
}
index_singleton = i;
}
}
for (int i = 0; i < ctx.getNSamples(); ++i) {
if (onlysingletons && index_singleton != i) {
continue;
}
final Genotype genotype = ctx.getGenotype(i);
final List<Object> labels = new ArrayList<>();
labels.add(genotype.getSampleName());
if (bychrom)
labels.add(ctx.getContig());
if (byID)
labels.add(ctx.hasID());
if (byType)
labels.add(ctx.getType().name());
if (byGenotypeType)
labels.add(genotype.getType());
if (byFilter)
labels.add(ctx.isFiltered());
if (byGFilter)
labels.add(genotype.isFiltered());
if (minGenotypeQuality >= 0) {
labels.add(genotype.hasGQ() && genotype.getGQ() >= this.minGenotypeQuality ? "." : "LOWQUAL");
}
if (byImpact) {
AnnPredictionParser.Impact impact = null;
for (final AnnPredictionParser.AnnPrediction pred : super.annParser.getPredictions(ctx)) {
// see http://stackoverflow.com/questions/41678374/
final Predicate<Allele> afilter = new Predicate<Allele>() {
@Override
public boolean test(final Allele A) {
return A.getDisplayString().equals(pred.getAllele());
}
};
if (genotype.getAlleles().stream().filter(afilter).findAny().isPresent() == false)
continue;
final AnnPredictionParser.Impact currImpact = pred.getPutativeImpact();
if (impact != null && currImpact.compareTo(impact) < 0)
continue;
impact = currImpact;
}
if (byImpact)
labels.add(impact == null ? "." : impact.name());
}
final Category cat = new Category(labels);
Long n = counts.get(cat);
counts.put(cat, n == null ? 1L : n + 1);
}
}
return counts;
}
use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.
the class MiniCaller method doWork.
@Override
public int doWork(final List<String> args) {
ConcatSam.ConcatSamIterator iter = null;
try {
if (this.fastaFile == null) {
LOG.error("no REF");
return -1;
}
/* load faid */
final ReferenceGenomeFactory referenceGenomeFactory = new ReferenceGenomeFactory();
this.referenceGenome = referenceGenomeFactory.openFastaFile(this.fastaFile);
this.dictionary = this.referenceGenome.getDictionary();
if (this.dictionary == null) {
LOG.error(JvarkitException.FastaDictionaryMissing.getMessage(this.fastaFile.getPath()));
}
/* create sam record iterator */
iter = new ConcatSam.Factory().addInterval(this.rgnStr).setEnableUnrollList(true).open(args);
final SAMFileHeader samFileheader = iter.getFileHeader();
final SAMSequenceDictionary dict = samFileheader.getSequenceDictionary();
if (dict == null) {
LOG.error(JvarkitException.BamDictionaryMissing.getMessage(String.join(", ", args)));
return -1;
}
if (!SequenceUtil.areSequenceDictionariesEqual(dict, this.dictionary)) {
LOG.error(JvarkitException.DictionariesAreNotTheSame.getMessage(dict, this.dictionary));
return -1;
}
final List<SAMReadGroupRecord> groups = samFileheader.getReadGroups();
if (groups == null || groups.isEmpty()) {
LOG.error("No group defined in input");
return -1;
}
final Set<String> sampleSet = groups.stream().map(srgr -> this.samRecordPartition.apply(srgr, samRecordPartition.name())).collect(Collectors.toSet());
/* create VCF metadata */
final Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY));
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY));
metaData.add(new VCFFormatHeaderLine("DPG", // one value of each genotype
VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Depth for each allele"));
metaData.add(new VCFFormatHeaderLine("DP4", 4, VCFHeaderLineType.Integer, "Depth ReforAlt|Strand : RF,RR,AF,AR"));
metaData.add(new VCFInfoHeaderLine("INDEL", 1, VCFHeaderLineType.Flag, "Variant is indel"));
// addMetaData(metaData);
final VCFHeader vcfHeader = new VCFHeader(metaData, sampleSet);
vcfHeader.setSequenceDictionary(this.dictionary);
/* create variant context */
this.variantContextWriter = super.openVariantContextWriter(outputFile);
this.variantContextWriter.writeHeader(vcfHeader);
ReferenceContig genomicSeq = null;
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(this.dictionary);
for (; ; ) {
SAMRecord rec = null;
if (iter.hasNext()) {
rec = progress.watch(iter.next());
if (rec.getReadUnmappedFlag())
continue;
if (this.readFilter.filterOut(rec))
continue;
/* flush buffer if needed */
while (!this.buffer.isEmpty() && (this.buffer.get(0).tid < rec.getReferenceIndex() || (this.buffer.get(0).tid == rec.getReferenceIndex() && (this.buffer.get(0).getEnd()) < rec.getAlignmentStart()))) {
this.buffer.remove(0).print();
}
/* get genomic sequence at this position */
if (genomicSeq == null || !genomicSeq.getContig().equals(rec.getContig())) {
genomicSeq = this.referenceGenome.getContig(rec.getContig());
}
final Cigar cigar = rec.getCigar();
if (cigar == null)
continue;
int readPos = 0;
// 0 based-reference
int refPos0 = rec.getAlignmentStart() - 1;
final byte[] bases = rec.getReadBases();
final byte[] quals = rec.getBaseQualities();
final String sampleName = this.samRecordPartition.getPartion(rec, samRecordPartition.name());
for (final CigarElement ce : cigar.getCigarElements()) {
final CigarOperator op = ce.getOperator();
switch(op) {
case P:
break;
case H:
break;
case S:
readPos += ce.getLength();
break;
// go
case N:
case D:
{
if (// we need base before deletion
refPos0 > 0) {
char refBase = genomicSeq.charAt(refPos0 - 1);
/* we use base before deletion */
final StringBuilder sb = new StringBuilder(ce.getLength());
sb.append(refBase);
for (int i = 0; i < ce.getLength(); ++i) {
sb.append(genomicSeq.charAt(refPos0 + i));
}
findContext(rec.getReferenceIndex(), // we use base *before deletion */
refPos0 - 1, Allele.create(sb.toString(), true)).getSample(sampleName).getAllele(Allele.create(String.valueOf(refBase), false)).incr(rec.getReadNegativeStrandFlag());
}
refPos0 += ce.getLength();
break;
}
case I:
{
if (refPos0 > 0) {
// float qual=0;
char refBase = Character.toUpperCase(genomicSeq.charAt(refPos0 - 1));
final StringBuilder sb = new StringBuilder(1 + ce.getLength());
sb.append(refBase);
for (int i = 0; i < ce.getLength(); ++i) {
sb.append((char) bases[readPos + i]);
// qual+=(readPos + i < quals.length?quals[ readPos + i]:0);
}
findContext(rec.getReferenceIndex(), // we use base *before deletion */
refPos0 - 1, Allele.create(String.valueOf(refBase), true)).getSample(sampleName).getAllele(Allele.create(sb.toString().toUpperCase(), false)).incr(rec.getReadNegativeStrandFlag());
}
readPos += ce.getLength();
break;
}
case EQ:
case M:
case X:
{
for (int i = 0; i < ce.getLength(); ++i) {
findContext(rec.getReferenceIndex(), refPos0 + i, Allele.create(String.valueOf(genomicSeq.charAt(refPos0 + i)), true)).getSample(sampleName).getAllele(Allele.create(String.valueOf((char) bases[readPos + i]), false)).incr(rec.getReadNegativeStrandFlag());
}
readPos += ce.getLength();
refPos0 += ce.getLength();
break;
}
default:
throw new IllegalStateException("Case statement didn't deal with cigar op: " + op);
}
}
} else {
break;
}
}
while (!buffer.isEmpty()) buffer.remove(0).print();
progress.finish();
iter.close();
iter = null;
this.variantContextWriter.close();
this.variantContextWriter = null;
return RETURN_OK;
} catch (Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(this.referenceGenome);
CloserUtil.close(this.variantContextWriter);
}
}
use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.
the class VcfRegistryCGI method doWork.
private void doWork(XMLStreamWriter w, final GroupFile gf) throws XMLStreamException {
Position pos = parsePosition();
if (pos == null)
return;
w.writeStartElement("div");
w.writeStartElement("h2");
w.writeCharacters(pos.chrom + ":" + pos.pos);
w.writeEndElement();
w.writeStartElement("table");
w.writeStartElement("thead");
w.writeStartElement("tr");
for (String header : new String[] { "CHROM", "POS", "ID", "REF", "QUAL", "Sample", "Alleles", "DP", "GQ", "File" }) {
w.writeStartElement("th");
w.writeCharacters(header);
// td
w.writeEndElement();
}
// tr
w.writeEndElement();
// thead
w.writeEndElement();
w.writeStartElement("tbody");
Set<String> samplesWithGenotypes = new HashSet<String>();
Set<String> allSamples = new HashSet<String>();
for (VcfFile f : getVcfFiles(gf)) {
TabixReader tabixReader = null;
TabixReader.Iterator iter = null;
BlockCompressedInputStream bgzin = null;
VCFHeader header = null;
AbstractVCFCodec vcfCodec = VCFUtils.createDefaultVCFCodec();
LineIterator lineIterator = null;
for (int i = 0; i < 2; i++) {
try {
if (i == 0) {
bgzin = new BlockCompressedInputStream(f.file);
lineIterator = new LineIteratorImpl(new SynchronousLineReader(bgzin));
header = (VCFHeader) vcfCodec.readActualHeader(lineIterator);
allSamples.addAll(header.getGenotypeSamples());
} else {
tabixReader = new TabixReader(f.file.getPath());
String line;
int[] x = tabixReader.parseReg(pos.chrom + ":" + pos.pos + "-" + (pos.pos));
if (x != null && x.length > 2 && x[0] != -1) {
iter = tabixReader.query(x[0], x[1], x[2]);
} else {
}
while (iter != null && (line = iter.next()) != null) {
VariantContext var = vcfCodec.decode(line);
for (String sample : header.getSampleNamesInOrder()) {
final Genotype genotype = var.getGenotype(sample);
if (genotype == null || !genotype.isCalled())
continue;
if (!genotype.isAvailable())
continue;
samplesWithGenotypes.add(sample);
w.writeStartElement("tr");
w.writeStartElement("td");
w.writeCharacters(var.getContig());
w.writeEndElement();
w.writeStartElement("td");
w.writeCharacters(String.valueOf(var.getStart()));
w.writeEndElement();
if (var.hasID()) {
w.writeStartElement("td");
if (var.getID().matches("rs[0-9]+")) {
w.writeStartElement("a");
w.writeAttribute("href", "http://www.ncbi.nlm.nih.gov/snp/" + var.getID().substring(2));
w.writeCharacters(var.getID());
// a
w.writeEndElement();
} else {
w.writeCharacters(var.getID());
}
// td
w.writeEndElement();
} else {
w.writeEmptyElement("td");
}
if (var.getReference() != null) {
w.writeStartElement("td");
w.writeCharacters(var.getReference().getBaseString());
w.writeEndElement();
} else {
w.writeEmptyElement("td");
}
if (var.hasLog10PError()) {
w.writeStartElement("td");
w.writeCharacters(String.valueOf((int) var.getPhredScaledQual()));
w.writeEndElement();
} else {
w.writeEmptyElement("td");
}
w.writeStartElement("td");
w.writeCharacters(sample);
w.writeEndElement();
List<Allele> alleles = genotype.getAlleles();
w.writeStartElement("td");
w.writeStartElement("span");
if (genotype.isHomRef()) {
w.writeAttribute("style", "color:green;");
} else if (genotype.isHomVar()) {
w.writeAttribute("style", "color:red;");
} else if (genotype.isHet()) {
w.writeAttribute("style", "color:blue;");
}
for (int j = 0; j < alleles.size(); ++j) {
if (j > 0)
w.writeCharacters(genotype.isPhased() ? "|" : "/");
w.writeCharacters(alleles.get(j).getBaseString());
}
// span
w.writeEndElement();
w.writeEndElement();
if (genotype.hasDP()) {
w.writeStartElement("td");
w.writeCharacters(String.valueOf(genotype.getDP()));
w.writeEndElement();
} else {
w.writeEmptyElement("td");
}
if (genotype.hasGQ()) {
w.writeStartElement("td");
w.writeCharacters(String.valueOf(genotype.getGQ()));
w.writeEndElement();
} else {
w.writeEmptyElement("td");
}
w.writeStartElement("td");
w.writeCharacters(f.file.getName());
w.writeEndElement();
// tr
w.writeEndElement();
w.flush();
}
}
}
} catch (Exception err) {
w.writeComment("BOUM " + err);
header = null;
lastException = err;
} finally {
CloserUtil.close(lineIterator);
CloserUtil.close(bgzin);
CloserUtil.close(tabixReader);
CloserUtil.close(iter);
}
if (i == 0 && header == null)
break;
}
w.flush();
}
// tbody
w.writeEndElement();
// table
w.writeEndElement();
allSamples.removeAll(samplesWithGenotypes);
if (!allSamples.isEmpty()) {
w.writeStartElement("h3");
w.writeCharacters("Samples not found");
w.writeEndElement();
w.writeStartElement("ol");
for (String sample : new TreeSet<String>(allSamples)) {
w.writeStartElement("li");
w.writeCharacters(sample);
w.writeEndElement();
}
w.writeEndElement();
}
writeHTMLException(w, this.lastException);
// div
w.writeEndElement();
}
use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.
the class EvsToVcf method doWork.
@Override
public int doWork(List<String> args) {
VariantContextWriter out = null;
try {
if (!args.isEmpty()) {
LOG.error("Illegal number of arguments");
return -1;
}
JAXBContext jc = JAXBContext.newInstance(SnpData.class);
Unmarshaller unmarshaller = jc.createUnmarshaller();
out = VCFUtils.createVariantContextWriterToStdout();
SAMSequenceDictionary dict = new SAMSequenceDictionary();
_fillDict(dict, "1", 249250621);
_fillDict(dict, "2", 243199373);
_fillDict(dict, "3", 198022430);
_fillDict(dict, "4", 191154276);
_fillDict(dict, "5", 180915260);
_fillDict(dict, "6", 171115067);
_fillDict(dict, "7", 159138663);
_fillDict(dict, "8", 146364022);
_fillDict(dict, "9", 141213431);
_fillDict(dict, "10", 135534747);
_fillDict(dict, "11", 135006516);
_fillDict(dict, "12", 133851895);
_fillDict(dict, "13", 115169878);
_fillDict(dict, "14", 107349540);
_fillDict(dict, "15", 102531392);
_fillDict(dict, "16", 90354753);
_fillDict(dict, "17", 81195210);
_fillDict(dict, "18", 78077248);
_fillDict(dict, "19", 59128983);
_fillDict(dict, "20", 63025520);
_fillDict(dict, "21", 48129895);
_fillDict(dict, "22", 51304566);
_fillDict(dict, "X", 155270560);
_fillDict(dict, "Y", 59373566);
_fillDict(dict, "MT", 16569);
VCFHeader header = new VCFHeader();
header.setSequenceDictionary(dict);
header.addMetaDataLine(new VCFInfoHeaderLine("CONS", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScore"));
header.addMetaDataLine(new VCFInfoHeaderLine("GERP", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
header.addMetaDataLine(new VCFInfoHeaderLine("uaMAF", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
header.addMetaDataLine(new VCFInfoHeaderLine("aaMAF", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
header.addMetaDataLine(new VCFInfoHeaderLine("totalMAF", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
header.addMetaDataLine(new VCFInfoHeaderLine("DP", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Integer, "conservationScoreGERP"));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));
out.writeHeader(header);
Pattern comma = Pattern.compile("[,]");
XMLInputFactory xif = XMLInputFactory.newFactory();
xif.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false);
XMLEventReader xmlr = xif.createXMLEventReader(System.in);
while (xmlr.hasNext() && !System.out.checkError()) {
XMLEvent evt = xmlr.peek();
if (!evt.isStartElement() || !evt.asStartElement().getName().getLocalPart().equals("snpList")) {
xmlr.nextEvent();
continue;
}
SnpData snpData = unmarshaller.unmarshal(xmlr, SnpData.class).getValue();
VariantContextBuilder vcb = new VariantContextBuilder();
Set<Allele> alleles = new HashSet<Allele>();
alleles.add(Allele.create(snpData.getRefAllele(), true));
for (String s : comma.split(snpData.getAltAlleles())) {
if (isEmpty(s))
continue;
alleles.add(Allele.create(s, false));
}
vcb.chr(snpData.getChromosome());
vcb.start(snpData.getChrPosition());
vcb.stop(snpData.getChrPosition() + snpData.getRefAllele().length() - 1);
if (!isEmpty(snpData.getRsIds()) && !snpData.getRsIds().equals("none")) {
vcb.id(snpData.getRsIds());
}
vcb.alleles(alleles);
Float d = parseDouble(snpData.getConservationScore());
if (d != null) {
vcb.attribute("CONS", d);
}
d = parseDouble(snpData.getConservationScoreGERP());
if (d != null) {
vcb.attribute("GERP", d);
}
vcb.attribute("uaMAF", (float) snpData.getUaMAF());
vcb.attribute("aaMAF", (float) snpData.getAaMAF());
vcb.attribute("totalMAF", (float) snpData.getTotalMAF());
vcb.attribute("DP", snpData.getAvgSampleReadDepth());
out.add(vcb.make());
}
xmlr.close();
out.close();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(out);
}
}
use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.
the class VcfStage method buildVariantTable.
/**
* build table of variants
*/
private TableView<VariantContext> buildVariantTable() {
final TableView<VariantContext> table = new TableView<>();
table.getColumns().add(makeColumn("CHROM", V -> V.getContig()));
table.getColumns().add(formatIntegerColumn(makeColumn("POS", V -> V.getStart())));
table.getColumns().add(makeColumn("ID", V -> V.hasID() ? V.getID() : null));
table.getColumns().add(makeColumn("REF", V -> allele2stringConverter.apply(V.getReference())));
table.getColumns().add(makeColumn("ALT", V -> V.getAlternateAlleles().stream().map(A -> allele2stringConverter.apply(A)).collect(Collectors.joining(","))));
table.getColumns().add(makeColumn("FILTER", V -> V.getFilters().stream().collect(Collectors.joining(","))));
table.getColumns().add(makeColumn("QUAL", V -> V.hasLog10PError() ? V.getPhredScaledQual() : null));
table.setPlaceholder(new Label("No Variant."));
final ContextMenu ctxMenu = new ContextMenu();
MenuItem menuItem = new MenuItem("dbSNP...");
menuItem.setOnAction(AE -> {
final VariantContext ctx = table.getSelectionModel().getSelectedItem();
if (ctx == null || !ctx.hasID() || !ctx.getID().matches("rs[0-9]+"))
return;
// http://stackoverflow.com/questions/16604341
VcfStage.this.owner.getHostServices().showDocument("https://www.ncbi.nlm.nih.gov/snp/" + ctx.getID().substring(2));
});
ctxMenu.getItems().add(menuItem);
ctxMenu.getItems().addAll(super.buildItemsForContextMenu());
for (final String build : new String[] { "human" }) {
menuItem = new MenuItem("Prediction Ensembl REST [" + build + "]");
menuItem.setOnAction(AE -> {
final VariantContext ctx = table.getSelectionModel().getSelectedItem();
if (ctx == null)
return;
for (final Allele a : ctx.getAlternateAlleles()) {
if (a.isReference())
continue;
if (a.isSymbolic())
continue;
if (a.isNoCall())
continue;
VcfStage.this.owner.getHostServices().showDocument("http://rest.ensembl.org/vep/" + build + "/region/" + JfxNgs.ContigToEnseml.apply(ctx.getContig()) + "%3A" + ctx.getStart() + "-" + ctx.getEnd() + "%3A1%2F" + a.getDisplayString() + "?content-type=text%2Fxml");
}
});
ctxMenu.getItems().add(menuItem);
}
for (final String database : new String[] { "Exac", "gnomAD" }) {
menuItem = new MenuItem("Open Variant (ALT) in " + database + " ... ");
menuItem.setOnAction(AE -> {
final VariantContext ctx = table.getSelectionModel().getSelectedItem();
if (ctx == null)
return;
for (final Allele a : ctx.getAlternateAlleles()) {
if (a.isReference())
continue;
if (a.isSymbolic())
continue;
if (a.isNoCall())
continue;
VcfStage.this.owner.getHostServices().showDocument("http://" + database.toLowerCase() + ".broadinstitute.org/variant/" + JfxNgs.ContigToEnseml.apply(ctx.getContig()) + "-" + ctx.getStart() + "-" + ctx.getReference().getDisplayString() + "-" + a.getDisplayString());
}
});
ctxMenu.getItems().add(menuItem);
}
table.setContextMenu(ctxMenu);
return table;
}
Aggregations