use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.
the class VcfLiftOver method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
VariantContextWriter failed = null;
GenomicSequence genomicSequence = null;
try {
final VCFHeader inputHeader = in.getHeader();
final Set<VCFHeaderLine> headerLines = inputHeader.getMetaDataInInputOrder().stream().filter(V -> {
if (!(V instanceof VCFInfoHeaderLine))
return true;
final VCFInfoHeaderLine vih = VCFInfoHeaderLine.class.cast(V);
if (removeInfo.contains(vih.getID()))
return false;
return true;
}).collect(Collectors.toSet());
if (this.failedFile != null) {
final VCFHeader header2 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header2.addMetaDataLine(new VCFInfoHeaderLine(this.failedinfoTag, 1, VCFHeaderLineType.String, "Why the liftOver failed."));
failed = super.openVariantContextWriter(failedFile);
failed.writeHeader(header2);
}
final VCFHeader header3 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
header3.setSequenceDictionary(this.indexedFastaSequenceFile.getSequenceDictionary());
header3.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header3.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header3.addMetaDataLine(new VCFInfoHeaderLine(this.infoTag, 1, VCFHeaderLineType.String, "Chromosome|Position before liftOver."));
out.writeHeader(header3);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader());
while (in.hasNext()) {
VariantContext ctx = progress.watch(in.next());
if (!this.removeInfo.isEmpty()) {
VariantContextBuilder vcb = new VariantContextBuilder(ctx);
for (final String tag : this.removeInfo) vcb.rmAttribute(tag);
ctx = vcb.make();
}
if (ctx.isIndel() && this.ignoreIndels) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "Indel").make());
continue;
}
if (adaptivematch) {
double minAlleleLength = Math.min(0, ctx.getAlleles().stream().mapToInt(A -> A.length()).min().orElse(0));
double maxAlleleLength = Math.max(1, ctx.getAlleles().stream().mapToInt(A -> A.length()).max().orElse(1));
this.liftOver.setLiftOverMinMatch(minAlleleLength / maxAlleleLength);
}
final Interval lifted = liftOver.liftOver(new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), // negative strand
false, String.join("|", ctx.getContig(), String.valueOf(ctx.getStart()), ctx.getReference().toString())));
if (lifted == null) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "LiftOverFailed").make());
} else if (this.indexedFastaSequenceFile.getSequenceDictionary().getSequence(lifted.getContig()) == null) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "ContigMissingDictionary|" + lifted.getContig()).make());
} else {
boolean alleleAreValidatedVsRef = true;
// part of the code was copied from picard/liftovervcf
final Map<Allele, Allele> reverseComplementAlleleMap = new HashMap<>();
final List<Allele> alleles = new ArrayList<Allele>();
for (final Allele oldAllele : ctx.getAlleles()) {
final Allele fixedAllele;
if (oldAllele.isSymbolic() || oldAllele.isNoCall() || oldAllele.equals(Allele.SPAN_DEL)) {
alleles.add(oldAllele);
continue;
} else if (lifted.isPositiveStrand()) {
fixedAllele = oldAllele;
alleles.add(oldAllele);
} else {
fixedAllele = Allele.create(SequenceUtil.reverseComplement(oldAllele.getBaseString()), oldAllele.isReference());
alleles.add(fixedAllele);
reverseComplementAlleleMap.put(oldAllele, fixedAllele);
}
if (this.checkAlleleSequence) {
if (genomicSequence == null || !genomicSequence.getChrom().equals(lifted.getContig())) {
genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, lifted.getContig());
}
final String alleleStr = fixedAllele.getBaseString();
int x = 0;
while (x < alleleStr.length() && lifted.getStart() - 1 + x < genomicSequence.length()) {
final char refChar = genomicSequence.charAt(lifted.getStart() - 1 + x);
if (Character.toLowerCase(refChar) != Character.toLowerCase(alleleStr.charAt(x))) {
alleleAreValidatedVsRef = false;
break;
}
++x;
}
if (x != alleleStr.length()) {
alleleAreValidatedVsRef = false;
break;
}
}
}
if (!alleleAreValidatedVsRef) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleMismatchRef").make());
continue;
}
if (lifted.getEnd() - lifted.getStart() != ctx.getEnd() - ctx.getStart()) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleBadLength|" + lifted.length()).make());
continue;
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx.getSource(), lifted.getContig(), lifted.getStart(), lifted.getEnd(), alleles);
vcb.id(ctx.getID());
vcb.attributes(ctx.getAttributes());
vcb.attribute(this.infoTag, ctx.getContig() + "|" + ctx.getStart() + "|" + ctx.getReference().getDisplayString());
vcb.filters(ctx.getFilters());
vcb.log10PError(ctx.getLog10PError());
final GenotypesContext genotypeContext = ctx.getGenotypes();
final GenotypesContext fixedGenotypes = GenotypesContext.create(genotypeContext.size());
for (final Genotype genotype : genotypeContext) {
final List<Allele> fixedAlleles = new ArrayList<Allele>();
for (final Allele allele : genotype.getAlleles()) {
final Allele fixedAllele = reverseComplementAlleleMap.containsKey(allele) ? reverseComplementAlleleMap.get(allele) : allele;
fixedAlleles.add(fixedAllele);
}
fixedGenotypes.add(new GenotypeBuilder(genotype).alleles(fixedAlleles).make());
}
vcb.genotypes(fixedGenotypes);
out.add(vcb.make());
}
}
if (failed != null) {
failed.close();
failed = null;
}
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(failed);
}
}
use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.
the class AlleleFrequencyCalculator method doWork.
@Override
public int doWork(final List<String> args) {
PrintStream out = null;
VcfIterator in = null;
try {
in = super.openVcfIterator(oneAndOnlyOneFile(args));
out = openFileOrStdoutAsPrintStream(outputFile);
out.println("CHR\tPOS\tID\tREF\tALT\tTOTAL_CNT\tALT_CNT\tFRQ");
while (in.hasNext() && !out.checkError()) {
final VariantContext ctx = in.next();
final Allele ref = ctx.getReference();
if (ref == null)
continue;
if (ctx.getNSamples() == 0 || ctx.getAlternateAlleles().isEmpty())
continue;
final Allele alt = ctx.getAltAlleleWithHighestAlleleCount();
if (alt == null)
continue;
final GenotypesContext genotypes = ctx.getGenotypes();
if (genotypes == null)
continue;
int total_ctn = 0;
int alt_ctn = 0;
for (int i = 0; i < genotypes.size(); ++i) {
final Genotype g = genotypes.get(i);
for (final Allele allele : g.getAlleles()) {
if (allele.equals(ref)) {
total_ctn++;
} else if (allele.equals(alt)) {
total_ctn++;
alt_ctn++;
}
}
}
out.print(ctx.getContig());
out.print("\t");
out.print(ctx.getStart());
out.print("\t");
out.print(ctx.hasID() ? ctx.getID() : ".");
out.print("\t");
out.print(ref.getBaseString());
out.print("\t");
out.print(alt.getBaseString());
out.print("\t");
out.print(total_ctn);
out.print("\t");
out.print(alt_ctn);
out.print("\t");
out.print(alt_ctn / (float) total_ctn);
out.println();
}
out.flush();
out.close();
out = null;
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(out);
CloserUtil.close(in);
}
}
use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.
the class VCFUtils method copyAlternateAlleleAnnotations.
/**
* copy InfoHeaderType.ALLELE from ctx from annotationVarian
*/
public static List<Object> copyAlternateAlleleAnnotations(final List<Allele> destAlternateAlleles, final VariantContext annotationVariant, final String infoAttributeName) {
final List<Object> attributes = new ArrayList<>(destAlternateAlleles.size());
final List<Allele> galts = annotationVariant.getAlternateAlleles();
final List<Object> gatts = annotationVariant.getAttributeAsList(infoAttributeName);
for (final Allele a : destAlternateAlleles) {
Object found = null;
// final int idx=gnomadCtx.getAlleleIndex(a);//non idx(REF)==0
final int idx = galts.indexOf(a);
if (idx >= 0) {
if (idx < gatts.size() && gatts.get(idx) != null && !gatts.get(idx).equals(".")) {
found = gatts.get(idx);
}
}
attributes.add(found);
}
return attributes;
}
use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.
the class VariantAttributesRecalculator method apply.
public VariantContext apply(final VariantContext ctx) {
if (this.recalculation_is_disabled)
return ctx;
if (!ctx.hasGenotypes())
return ctx;
if (!header_was_set)
throw new IllegalStateException("Vcf header was not set !");
final Predicate<Genotype> gtFilter = G -> recalculation_ignore_gt_filtered || !G.isFiltered();
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
if (this.DP_flag) {
vcb.attribute(VCFConstants.DEPTH_KEY, ctx.getGenotypes().stream().filter(gtFilter).filter(G -> G.hasDP()).mapToInt(G -> G.getDP()).sum());
}
final List<Integer> acL = new ArrayList<>();
for (final Allele alt : ctx.getAlternateAlleles()) {
final int ac = (int) ctx.getGenotypes().stream().filter(gtFilter).mapToLong(G -> G.getAlleles().stream().filter(A -> A.equals(alt)).count()).sum();
acL.add(ac);
}
final int AN = (int) ctx.getGenotypes().stream().filter(gtFilter).filter(G -> G.isCalled()).mapToLong(G -> G.getAlleles().stream().filter(A -> A.isCalled()).count()).sum();
if (this.AC_flag) {
vcb.rmAttribute(VCFConstants.ALLELE_COUNT_KEY);
if (!acL.isEmpty())
vcb.attribute(VCFConstants.ALLELE_COUNT_KEY, acL);
}
if (this.AN_flag) {
vcb.rmAttribute(VCFConstants.ALLELE_NUMBER_KEY);
vcb.attribute(VCFConstants.ALLELE_NUMBER_KEY, AN);
}
if (this.AF_flag) {
vcb.rmAttribute(VCFConstants.ALLELE_FREQUENCY_KEY);
if (AN > 0 && !acL.isEmpty()) {
final List<Double> afL = new ArrayList<>(acL.size());
for (int x = 0; x < acL.size(); ++x) {
afL.add(acL.get(x) / (double) AN);
}
vcb.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, afL);
}
}
return vcb.make();
}
use of htsjdk.variant.variantcontext.Allele in project jvarkit by lindenb.
the class GenotypeBinding method objectToEntry.
@Override
public void objectToEntry(Genotype g, TupleOutput out) {
out.writeString(g.getSampleName());
/* ============================================= */
if (g.hasDP()) {
out.writeBoolean(true);
out.writeInt(g.getDP());
} else {
out.writeBoolean(false);
}
/* ============================================= */
if (g.hasAD()) {
out.writeBoolean(true);
arrayOfIntToEntry(g.getAD(), out);
} else {
out.writeBoolean(false);
}
/* ============================================= */
if (g.hasGQ()) {
out.writeBoolean(true);
out.writeInt(g.getGQ());
} else {
out.writeBoolean(false);
}
/* ============================================= */
if (g.hasPL()) {
out.writeBoolean(true);
arrayOfIntToEntry(g.getPL(), out);
} else {
out.writeBoolean(false);
}
/* ALLELES ======================================== */
List<Allele> alleles = g.getAlleles();
out.writeInt(alleles.size());
for (Allele a : alleles) {
this.alleleBinding.objectToEntry(a, out);
}
/* ATTRIBUTES ===================================== */
Map<String, Object> xAtts = g.getExtendedAttributes();
out.writeInt(xAtts.size());
for (String attKey : xAtts.keySet()) {
out.writeString(attKey);
super.writeAttribute(out, xAtts.get(attKey));
}
}
Aggregations