use of htsjdk.variant.variantcontext.Genotype in project jvarkit by lindenb.
the class VcfLiftOver method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
VariantContextWriter failed = null;
GenomicSequence genomicSequence = null;
try {
final VCFHeader inputHeader = in.getHeader();
final Set<VCFHeaderLine> headerLines = inputHeader.getMetaDataInInputOrder().stream().filter(V -> {
if (!(V instanceof VCFInfoHeaderLine))
return true;
final VCFInfoHeaderLine vih = VCFInfoHeaderLine.class.cast(V);
if (removeInfo.contains(vih.getID()))
return false;
return true;
}).collect(Collectors.toSet());
if (this.failedFile != null) {
final VCFHeader header2 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header2.addMetaDataLine(new VCFInfoHeaderLine(this.failedinfoTag, 1, VCFHeaderLineType.String, "Why the liftOver failed."));
failed = super.openVariantContextWriter(failedFile);
failed.writeHeader(header2);
}
final VCFHeader header3 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
header3.setSequenceDictionary(this.indexedFastaSequenceFile.getSequenceDictionary());
header3.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header3.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header3.addMetaDataLine(new VCFInfoHeaderLine(this.infoTag, 1, VCFHeaderLineType.String, "Chromosome|Position before liftOver."));
out.writeHeader(header3);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader());
while (in.hasNext()) {
VariantContext ctx = progress.watch(in.next());
if (!this.removeInfo.isEmpty()) {
VariantContextBuilder vcb = new VariantContextBuilder(ctx);
for (final String tag : this.removeInfo) vcb.rmAttribute(tag);
ctx = vcb.make();
}
if (ctx.isIndel() && this.ignoreIndels) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "Indel").make());
continue;
}
if (adaptivematch) {
double minAlleleLength = Math.min(0, ctx.getAlleles().stream().mapToInt(A -> A.length()).min().orElse(0));
double maxAlleleLength = Math.max(1, ctx.getAlleles().stream().mapToInt(A -> A.length()).max().orElse(1));
this.liftOver.setLiftOverMinMatch(minAlleleLength / maxAlleleLength);
}
final Interval lifted = liftOver.liftOver(new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), // negative strand
false, String.join("|", ctx.getContig(), String.valueOf(ctx.getStart()), ctx.getReference().toString())));
if (lifted == null) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "LiftOverFailed").make());
} else if (this.indexedFastaSequenceFile.getSequenceDictionary().getSequence(lifted.getContig()) == null) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "ContigMissingDictionary|" + lifted.getContig()).make());
} else {
boolean alleleAreValidatedVsRef = true;
// part of the code was copied from picard/liftovervcf
final Map<Allele, Allele> reverseComplementAlleleMap = new HashMap<>();
final List<Allele> alleles = new ArrayList<Allele>();
for (final Allele oldAllele : ctx.getAlleles()) {
final Allele fixedAllele;
if (oldAllele.isSymbolic() || oldAllele.isNoCall() || oldAllele.equals(Allele.SPAN_DEL)) {
alleles.add(oldAllele);
continue;
} else if (lifted.isPositiveStrand()) {
fixedAllele = oldAllele;
alleles.add(oldAllele);
} else {
fixedAllele = Allele.create(SequenceUtil.reverseComplement(oldAllele.getBaseString()), oldAllele.isReference());
alleles.add(fixedAllele);
reverseComplementAlleleMap.put(oldAllele, fixedAllele);
}
if (this.checkAlleleSequence) {
if (genomicSequence == null || !genomicSequence.getChrom().equals(lifted.getContig())) {
genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, lifted.getContig());
}
final String alleleStr = fixedAllele.getBaseString();
int x = 0;
while (x < alleleStr.length() && lifted.getStart() - 1 + x < genomicSequence.length()) {
final char refChar = genomicSequence.charAt(lifted.getStart() - 1 + x);
if (Character.toLowerCase(refChar) != Character.toLowerCase(alleleStr.charAt(x))) {
alleleAreValidatedVsRef = false;
break;
}
++x;
}
if (x != alleleStr.length()) {
alleleAreValidatedVsRef = false;
break;
}
}
}
if (!alleleAreValidatedVsRef) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleMismatchRef").make());
continue;
}
if (lifted.getEnd() - lifted.getStart() != ctx.getEnd() - ctx.getStart()) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleBadLength|" + lifted.length()).make());
continue;
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx.getSource(), lifted.getContig(), lifted.getStart(), lifted.getEnd(), alleles);
vcb.id(ctx.getID());
vcb.attributes(ctx.getAttributes());
vcb.attribute(this.infoTag, ctx.getContig() + "|" + ctx.getStart() + "|" + ctx.getReference().getDisplayString());
vcb.filters(ctx.getFilters());
vcb.log10PError(ctx.getLog10PError());
final GenotypesContext genotypeContext = ctx.getGenotypes();
final GenotypesContext fixedGenotypes = GenotypesContext.create(genotypeContext.size());
for (final Genotype genotype : genotypeContext) {
final List<Allele> fixedAlleles = new ArrayList<Allele>();
for (final Allele allele : genotype.getAlleles()) {
final Allele fixedAllele = reverseComplementAlleleMap.containsKey(allele) ? reverseComplementAlleleMap.get(allele) : allele;
fixedAlleles.add(fixedAllele);
}
fixedGenotypes.add(new GenotypeBuilder(genotype).alleles(fixedAlleles).make());
}
vcb.genotypes(fixedGenotypes);
out.add(vcb.make());
}
}
if (failed != null) {
failed.close();
failed = null;
}
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(failed);
}
}
use of htsjdk.variant.variantcontext.Genotype in project jvarkit by lindenb.
the class LumpyMoreSamples method doWork.
@Override
public int doWork(final List<String> args) {
VcfIterator r = null;
VariantContextWriter vcw = null;
final Map<String, SamReader> sample2samreaders = new HashMap<>();
try {
r = super.openVcfIterator(oneFileOrNull(args));
final VCFHeader headerIn = r.getHeader();
final SAMSequenceDictionary dict = headerIn.getSequenceDictionary();
if (dict == null) {
LOG.error(JvarkitException.VcfDictionaryMissing.getMessage("input vcf"));
return -1;
}
final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
IOUtil.slurpLines(this.bamFileList).stream().forEach(F -> {
if (F.trim().isEmpty())
return;
final SamReader sr = samReaderFactory.open(SamInputResource.of(F));
final SAMFileHeader samHeader = sr.getFileHeader();
final SAMSequenceDictionary dict2 = samHeader.getSequenceDictionary();
if (dict2 == null) {
throw new JvarkitException.BamDictionaryMissing(F);
}
if (!SequenceUtil.areSequenceDictionariesEqual(dict, dict2)) {
throw new JvarkitException.DictionariesAreNotTheSame(dict, dict2);
}
for (final SAMReadGroupRecord rg : samHeader.getReadGroups()) {
final String sample = rg.getSample();
if (StringUtil.isBlank(sample))
continue;
final SamReader reader = sample2samreaders.get(sample);
if (reader == null) {
sample2samreaders.put(sample, reader);
} else if (reader == sr) {
continue;
} else {
throw new JvarkitException.UserError("more than one sample per bam:" + F);
}
}
});
final Set<String> inVcfSampleNames = new HashSet<>(headerIn.getSampleNamesInOrder());
final Set<String> outVcfSampleNames = new HashSet<>(inVcfSampleNames);
outVcfSampleNames.addAll(sample2samreaders.keySet());
final VCFHeader headerOut = new VCFHeader(headerIn.getMetaDataInInputOrder(), outVcfSampleNames);
final VCFFormatHeaderLine SU2 = new VCFFormatHeaderLine("SU2", 1, VCFHeaderLineType.Integer, "Number of pieces of evidence supporting the variant");
final VCFFormatHeaderLine PE2 = new VCFFormatHeaderLine("PE2", 1, VCFHeaderLineType.Integer, "Number of split reads supporting the variant");
final VCFFormatHeaderLine SR2 = new VCFFormatHeaderLine("SR2", 1, VCFHeaderLineType.Integer, "Number of paired-end reads supporting the variant");
headerOut.addMetaDataLine(SU2);
headerOut.addMetaDataLine(PE2);
headerOut.addMetaDataLine(SR2);
vcw = super.openVariantContextWriter(this.outputFile);
vcw.writeHeader(headerOut);
while (r.hasNext()) {
final VariantContext ctx = r.next();
final StructuralVariantType sttype = ctx.getStructuralVariantType();
if (sttype == null)
continue;
final int tid = dict.getSequenceIndex(ctx.getContig());
final Map<String, Genotype> genotypeMap = new HashMap<>();
ctx.getGenotypes().stream().forEach(G -> genotypeMap.put(G.getSampleName(), G));
for (final String sample : sample2samreaders.keySet()) {
final SamReader samReader = sample2samreaders.get(sample);
final SupportingReads sr = new SupportingReads();
switch(sttype) {
case DEL:
{
int pos = ctx.getStart();
int[] ci = confidenceIntervalPos(ctx);
final QueryInterval left = new QueryInterval(tid, pos - ci[0], pos + ci[1]);
int end = ctx.getEnd();
ci = confidenceIntervalEnd(ctx);
final QueryInterval right = new QueryInterval(tid, end - ci[0], end + ci[1]);
for (final SAMRecord rec : extractSupportingReads(ctx, sample, samReader, new QueryInterval[] { left, right })) {
final Cigar cigar = rec.getCigar();
if (cigar.isLeftClipped()) {
final QueryInterval qi = new QueryInterval(tid, rec.getUnclippedStart(), rec.getStart() - 1);
if (qi.overlaps(left)) {
sr.splitReads++;
if (rec.getReadPairedFlag())
sr.pairedReads++;
}
}
if (cigar.isRightClipped()) {
final QueryInterval qi = new QueryInterval(tid, rec.getEnd() + 1, rec.getUnclippedEnd());
if (qi.overlaps(right)) {
sr.splitReads++;
if (rec.getReadPairedFlag())
sr.pairedReads++;
}
}
}
break;
}
default:
break;
}
final GenotypeBuilder gb;
if (genotypeMap.containsKey(sample)) {
gb = new GenotypeBuilder(genotypeMap.get(sample));
} else {
gb = new GenotypeBuilder(sample, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
}
gb.attribute(SR2.getID(), sr.splitReads);
gb.attribute(PE2.getID(), sr.pairedReads);
gb.attribute(SU2.getID(), 0);
genotypeMap.put(sample, gb.make());
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
// add missing samples.
for (final String sampleName : outVcfSampleNames) {
if (genotypeMap.containsKey(sampleName))
continue;
genotypeMap.put(sampleName, new GenotypeBuilder(sampleName, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).make());
}
vcb.genotypes(genotypeMap.values());
vcw.add(vcb.make());
}
r.close();
r = null;
sample2samreaders.values().stream().forEach(R -> CloserUtil.close(R));
LOG.info("done");
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
}
}
use of htsjdk.variant.variantcontext.Genotype in project jvarkit by lindenb.
the class AlleleFrequencyCalculator method doWork.
@Override
public int doWork(final List<String> args) {
PrintStream out = null;
VcfIterator in = null;
try {
in = super.openVcfIterator(oneAndOnlyOneFile(args));
out = openFileOrStdoutAsPrintStream(outputFile);
out.println("CHR\tPOS\tID\tREF\tALT\tTOTAL_CNT\tALT_CNT\tFRQ");
while (in.hasNext() && !out.checkError()) {
final VariantContext ctx = in.next();
final Allele ref = ctx.getReference();
if (ref == null)
continue;
if (ctx.getNSamples() == 0 || ctx.getAlternateAlleles().isEmpty())
continue;
final Allele alt = ctx.getAltAlleleWithHighestAlleleCount();
if (alt == null)
continue;
final GenotypesContext genotypes = ctx.getGenotypes();
if (genotypes == null)
continue;
int total_ctn = 0;
int alt_ctn = 0;
for (int i = 0; i < genotypes.size(); ++i) {
final Genotype g = genotypes.get(i);
for (final Allele allele : g.getAlleles()) {
if (allele.equals(ref)) {
total_ctn++;
} else if (allele.equals(alt)) {
total_ctn++;
alt_ctn++;
}
}
}
out.print(ctx.getContig());
out.print("\t");
out.print(ctx.getStart());
out.print("\t");
out.print(ctx.hasID() ? ctx.getID() : ".");
out.print("\t");
out.print(ref.getBaseString());
out.print("\t");
out.print(alt.getBaseString());
out.print("\t");
out.print(total_ctn);
out.print("\t");
out.print(alt_ctn);
out.print("\t");
out.print(alt_ctn / (float) total_ctn);
out.println();
}
out.flush();
out.close();
out = null;
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(out);
CloserUtil.close(in);
}
}
use of htsjdk.variant.variantcontext.Genotype in project jvarkit by lindenb.
the class VariantAttributesRecalculator method apply.
public VariantContext apply(final VariantContext ctx) {
if (this.recalculation_is_disabled)
return ctx;
if (!ctx.hasGenotypes())
return ctx;
if (!header_was_set)
throw new IllegalStateException("Vcf header was not set !");
final Predicate<Genotype> gtFilter = G -> recalculation_ignore_gt_filtered || !G.isFiltered();
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
if (this.DP_flag) {
vcb.attribute(VCFConstants.DEPTH_KEY, ctx.getGenotypes().stream().filter(gtFilter).filter(G -> G.hasDP()).mapToInt(G -> G.getDP()).sum());
}
final List<Integer> acL = new ArrayList<>();
for (final Allele alt : ctx.getAlternateAlleles()) {
final int ac = (int) ctx.getGenotypes().stream().filter(gtFilter).mapToLong(G -> G.getAlleles().stream().filter(A -> A.equals(alt)).count()).sum();
acL.add(ac);
}
final int AN = (int) ctx.getGenotypes().stream().filter(gtFilter).filter(G -> G.isCalled()).mapToLong(G -> G.getAlleles().stream().filter(A -> A.isCalled()).count()).sum();
if (this.AC_flag) {
vcb.rmAttribute(VCFConstants.ALLELE_COUNT_KEY);
if (!acL.isEmpty())
vcb.attribute(VCFConstants.ALLELE_COUNT_KEY, acL);
}
if (this.AN_flag) {
vcb.rmAttribute(VCFConstants.ALLELE_NUMBER_KEY);
vcb.attribute(VCFConstants.ALLELE_NUMBER_KEY, AN);
}
if (this.AF_flag) {
vcb.rmAttribute(VCFConstants.ALLELE_FREQUENCY_KEY);
if (AN > 0 && !acL.isEmpty()) {
final List<Double> afL = new ArrayList<>(acL.size());
for (int x = 0; x < acL.size(); ++x) {
afL.add(acL.get(x) / (double) AN);
}
vcb.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, afL);
}
}
return vcb.make();
}
use of htsjdk.variant.variantcontext.Genotype in project jvarkit by lindenb.
the class VcfTools method isMendelianIncompatibility.
/**
* return true if there is a mendelian problem with the children
*/
public boolean isMendelianIncompatibility(final VariantContext ctx, final Pedigree.Person child) {
final Genotype gc = ctx.getGenotype(child.getId());
if (gc == null)
return false;
final Genotype gf = child.hasFather() ? ctx.getGenotype(child.getFatherId()) : null;
final Genotype gm = child.hasMother() ? ctx.getGenotype(child.getMotherId()) : null;
if (gf == null && gm == null)
return false;
return isMendelianIncompatibility(gc, gf, gm);
}
Aggregations