use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class SVPredictions method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VCFIterator r, VariantContextWriter w) {
try {
final VCFHeader header = r.getHeader();
final SAMSequenceDictionary dict = header.getSequenceDictionary();
try (final GtfReader gtfReader = new GtfReader(this.gtfPath)) {
if (dict != null)
gtfReader.setContigNameConverter(ContigNameConverter.fromOneDictionary(dict));
gtfReader.getAllGenes().stream().forEach(G -> this.all_gene.put(new Interval(G), G));
}
final VCFHeader h2 = new VCFHeader(header);
/* split 'limit' string */
if (StringUtils.isBlank(this.whereStr)) {
// add all
Arrays.stream(WhereInGene.values()).forEach(C -> this.limitWhere.add(C));
} else {
for (final String ss : this.whereStr.split("[ \t,;]+")) {
if (StringUtils.isBlank(ss))
continue;
// gene and transcript expand to everything but intergenic
if (ss.toLowerCase().equals("gene") || ss.toLowerCase().equals("transcript")) {
Arrays.stream(WhereInGene.values()).filter(C -> !C.equals(WhereInGene.intergenic)).forEach(C -> this.limitWhere.add(C));
} else {
final WhereInGene g = Arrays.stream(WhereInGene.values()).filter(A -> A.name().equalsIgnoreCase(ss)).findFirst().orElseThrow(() -> new IllegalArgumentException("Bad identifier expected one of :" + Arrays.stream(WhereInGene.values()).map(X -> X.name()).collect(Collectors.joining(", "))));
this.limitWhere.add(g);
}
}
if (this.limitWhere.isEmpty()) {
LOG.error("--where option provided but no identifier was found.");
return -1;
}
}
final VCFFilterHeaderLine filterHeader;
if (!StringUtils.isBlank(this.filterStr)) {
filterHeader = new VCFFilterHeaderLine(this.filterStr, "variant failing locations: " + this.limitWhere.stream().map(V -> V.name()).collect(Collectors.joining(",")));
h2.addMetaDataLine(filterHeader);
} else {
filterHeader = null;
}
h2.addMetaDataLine(new VCFInfoHeaderLine(this.info_tag, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Structural variant consequence."));
JVarkitVersion.getInstance().addMetaData(this, h2);
w.writeHeader(h2);
while (r.hasNext()) {
final VariantContext ctx = r.next();
final int ctx_bnd_end;
if (this.ignore_bnd_end && ctx.hasAttribute(VCFConstants.SVTYPE) && ctx.getAttributeAsString(VCFConstants.SVTYPE, "").equals("BND")) {
ctx_bnd_end = ctx.getStart();
} else {
ctx_bnd_end = ctx.getEnd();
}
final Collection<Gene> genes = this.all_gene.getOverlapping(new Interval(ctx.getContig(), Math.max(1, ctx.getStart() - this.upstream_size), ctx_bnd_end + this.upstream_size));
if (// intergenic
genes.isEmpty()) {
// discard anyway
if (!this.limitWhere.contains(WhereInGene.intergenic) && filterHeader == null)
continue;
Gene leftGene = null;
for (final Gene g : this.all_gene.getOverlapping(new Interval(ctx.getContig(), 1, ctx.getStart()))) {
if (leftGene == null || leftGene.getEnd() < g.getEnd())
leftGene = g;
}
final String leftId = (leftGene == null ? "" : leftGene.getId());
final String leftName = (leftGene == null ? "" : leftGene.getGeneName());
Gene rightGene = null;
for (final Gene g : this.all_gene.getOverlapping(new Interval(ctx.getContig(), ctx.getStart(), Integer.MAX_VALUE))) {
if (rightGene == null || rightGene.getStart() > g.getStart())
rightGene = g;
}
final String rightId = (rightGene == null ? "" : rightGene.getId());
final String rightName = (rightGene == null ? "" : rightGene.getGeneName());
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
// FILTER
if (!this.limitWhere.contains(WhereInGene.intergenic) && filterHeader != null) {
vcb.filter(filterHeader.getID());
}
if (!(leftGene == null && rightGene == null)) {
vcb.attribute(this.info_tag, "intergenic|" + leftId + "-" + rightId + "|" + leftName + "-" + rightName);
}
w.add(vcb.make());
} else {
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
final List<List<Annotation>> annotations = genes.stream().map(G -> annotGene(G, ctx)).collect(Collectors.toList());
final boolean match_user_filter = annotations.stream().flatMap(L -> L.stream()).flatMap(A -> A.where.stream()).anyMatch(A -> this.limitWhere.contains(A));
if (!match_user_filter) {
if (filterHeader == null)
continue;
vcb.filter(filterHeader.getID());
}
if (this.max_genes_count != -1 && genes.size() > this.max_genes_count) {
final String prefix = annotations.stream().flatMap(L -> L.stream()).flatMap(A -> A.where.stream()).map(A -> A.name()).collect(Collectors.toCollection(TreeSet::new)).stream().collect(Collectors.joining("&"));
vcb.attribute(this.info_tag, (StringUtils.isBlank(prefix) ? "." : prefix) + "|multiple_genes|" + genes.size());
} else {
final List<String> annots = annotations.stream().flatMap(L -> L.stream()).map(A -> A.where.stream().map(X -> X.name()).collect(Collectors.toCollection(TreeSet::new)).stream().collect(Collectors.joining("&")) + A.label).collect(Collectors.toSet()).stream().collect(Collectors.toList());
if (!annots.isEmpty())
vcb.attribute(this.info_tag, annots);
}
w.add(vcb.make());
}
}
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(w);
}
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class VCFPredictions method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VCFIterator r, VariantContextWriter w) {
try {
this.referenceGenome = ReferenceSequenceFileFactory.getReferenceSequenceFile(this.faidxPath);
final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(this.referenceGenome);
final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(dict);
loadGtf(dict);
final VCFHeader header = r.getHeader();
final VCFHeader h2 = new VCFHeader(header);
JVarkitVersion.getInstance().addMetaData(this, h2);
switch(this.outputSyntax) {
case Vep:
{
h2.addMetaDataLine(new VCFInfoHeaderLine("CSQ", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Consequence type as predicted by VEP" + ". Format: Allele|Feature|Feature_type|Consequence|CDS_position|Protein_position|Amino_acids|Codons"));
break;
}
case SnpEff:
{
h2.addMetaDataLine(new VCFInfoHeaderLine("ANN", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Functional annotations: 'Allele | Annotation | Annotation_Impact | Gene_Name | Gene_ID | Feature_Type | Feature_ID | Transcript_BioType | Rank | HGVS.c | HGVS.p | cDNA.pos / cDNA.length | CDS.pos / CDS.length | AA.pos / AA.length | Distance | ERRORS / WARNINGS / INFO'"));
break;
}
default:
{
final StringBuilder format = new StringBuilder();
for (FORMAT1 f : FORMAT1.values()) {
if (format.length() > 0)
format.append("|");
format.append(f.name());
}
h2.addMetaDataLine(new VCFInfoHeaderLine(TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Prediction from " + getClass().getSimpleName() + ". Format: " + format));
break;
}
}
w.writeHeader(h2);
final RNASequenceFactory rnaSeqFactory = new RNASequenceFactory();
rnaSeqFactory.setContigToGenomicSequence(S -> getGenomicSequence(S));
while (r.hasNext()) {
final VariantContext ctx = r.next();
final String normalizedContig = contigNameConverter.apply(ctx.getContig());
if (StringUtil.isBlank(normalizedContig)) {
w.add(ctx);
continue;
}
final List<Transcript> transcripts = this.transcriptTreeMap.getOverlapping(new SimpleInterval(normalizedContig, ctx.getStart(), ctx.getEnd())).stream().flatMap(L -> L.stream()).collect(Collectors.toList());
final List<Annotation> all_annotations = new ArrayList<>();
final List<Allele> alternateAlleles;
if (ctx.getNAlleles() <= 1) {
// not a variant, just REF
alternateAlleles = Arrays.asList(Allele.NO_CALL);
} else {
alternateAlleles = ctx.getAlternateAlleles();
}
for (final Allele altAllele : alternateAlleles) {
if (altAllele.isReference() || altAllele.equals(Allele.SPAN_DEL) || altAllele.equals(Allele.NON_REF_ALLELE))
continue;
/* intergenic ====================================================== */
if (transcripts.isEmpty()) {
Transcript leftGene = null;
String leftId = "";
String leftName = "";
for (Iterator<Transcript> iter = this.transcriptTreeMap.getOverlapping(new SimpleInterval(normalizedContig, 1, ctx.getStart())).stream().flatMap(L -> L.stream()).iterator(); iter.hasNext(); ) {
final Transcript t = iter.next();
if (leftGene == null || leftGene.getEnd() < t.getEnd()) {
leftGene = t;
leftId = t.getGene().getId();
leftName = t.getGene().getGeneName();
}
}
Transcript rightGene = null;
String rightId = "";
String rightName = "";
for (Iterator<Transcript> iter = this.transcriptTreeMap.getOverlapping(new SimpleInterval(normalizedContig, ctx.getEnd(), dict.getSequence(normalizedContig).getSequenceLength())).stream().flatMap(L -> L.stream()).iterator(); iter.hasNext(); ) {
final Transcript t = iter.next();
if (rightGene == null || t.getStart() < rightGene.getStart()) {
rightGene = t;
rightId = t.getGene().getId();
rightName = t.getGene().getGeneName();
}
}
// intergenic
final Annotation annot = new Annotation(altAllele);
annot.seqont.add("intergenic");
annot.geneId = leftId + "-" + rightId;
annot.geneName = leftName + "-" + rightName;
all_annotations.add(annot);
} else {
for (final Transcript transcript : transcripts) {
final Annotation annotation = new Annotation(altAllele, transcript);
all_annotations.add(annotation);
if (!transcript.overlaps(ctx)) {
if (((ctx.getEnd() < transcript.getStart() && transcript.isNegativeStrand()) || (ctx.getStart() > transcript.getEnd() && transcript.isPositiveStrand()))) {
if (ctx.withinDistanceOf(transcript, 500)) {
annotation.seqont.add("500B_downstream_variant");
} else if (ctx.withinDistanceOf(transcript, 2_000)) {
annotation.seqont.add("2KB_downstream_variant");
}
} else if (((ctx.getEnd() < transcript.getStart() && transcript.isPositiveStrand()) || (ctx.getStart() > transcript.getEnd() && transcript.isNegativeStrand()))) {
if (ctx.withinDistanceOf(transcript, 2_000)) {
annotation.seqont.add("2KB_upstream_variant");
} else if (ctx.withinDistanceOf(transcript, 5_000)) {
annotation.seqont.add("5KB_upstream_variant");
}
}
continue;
}
if (CoordMath.encloses(ctx.getStart(), ctx.getEnd(), transcript.getStart(), transcript.getEnd())) {
// TODO can be inversion ,etc...
annotation.seqont.add("transcript_ablation");
continue;
}
for (int side = 0; side < 2; ++side) {
final Optional<UTR> opt_utr = (side == 0 ? transcript.getTranscriptUTR5() : transcript.getTranscriptUTR3());
if (!opt_utr.isPresent())
continue;
final UTR utr = opt_utr.get();
if (CoordMath.overlaps(utr.getStart(), utr.getEnd(), ctx.getStart(), ctx.getEnd())) {
annotation.seqont.add(side == 0 ? "5_prime_UTR_variant" : "3_prime_UTR_variant");
}
}
for (int side = 0; side < 2; ++side) {
final Optional<? extends ExonOrIntron> opt_ex;
if (side == 0) {
opt_ex = transcript.getExons().stream().filter(E -> E.overlaps(ctx)).findFirst();
} else {
opt_ex = transcript.getIntrons().stream().filter(E -> E.overlaps(ctx)).findFirst();
}
if (!opt_ex.isPresent())
continue;
final ExonOrIntron ei = opt_ex.get();
if (side == 0) {
if (transcript.isNonCoding())
annotation.seqont.add("non_coding_transcript_exon_variant");
} else {
if (transcript.isNonCoding())
annotation.seqont.add("non_coding_transcript_intron_variant");
annotation.seqont.add("intron");
}
if (ctx.getStart() == ctx.getEnd() && ei.isSplicing(ctx.getStart())) {
if (ei.isSplicingAcceptor(ctx.getStart())) {
// SPLICING_ACCEPTOR
annotation.seqont.add("splice_acceptor");
} else if (ei.isSplicingDonor(ctx.getStart())) {
// SPLICING_DONOR
annotation.seqont.add("splice_donor");
} else // ??
{
annotation.seqont.add("splicing_variant");
}
}
}
final StructuralVariantType svType = ctx.getStructuralVariantType();
if (svType != null) {
continue;
}
if (transcript.isNonCoding()) {
// TODO
annotation.seqont.add("non_coding_transcript_variant");
continue;
}
RNASequence cDNA = this.transcriptId2cdna.get(transcript.getId());
if (cDNA == null) {
cDNA = rnaSeqFactory.getCodingRNA(transcript);
this.transcriptId2cdna.put(transcript.getId(), cDNA);
}
final OptionalInt opt_pos_cdna0 = cDNA.convertGenomic0ToRnaIndex0(ctx.getStart() - 1);
if (!opt_pos_cdna0.isPresent())
continue;
final int pos_cdna0 = opt_pos_cdna0.getAsInt();
final int pos_aa = pos_cdna0 / 3;
final GeneticCode geneticCode = GeneticCode.getStandard();
if (AcidNucleics.isATGC(altAllele.getBaseString())) {
String bases = altAllele.getBaseString().toUpperCase();
if (transcript.isNegativeStrand()) {
bases = AcidNucleics.reverseComplement(bases);
}
final MutedSequence mutRNA = new MutedSequence(cDNA, pos_cdna0, ctx.getReference().length(), bases);
final PeptideSequence<CharSequence> wildProt = PeptideSequence.of(cDNA, geneticCode);
final PeptideSequence<CharSequence> mutProt = PeptideSequence.of(mutRNA, geneticCode);
final int mod = pos_cdna0 % 3;
annotation.wildCodon = ("" + cDNA.charAt(pos_cdna0 - mod + 0) + cDNA.charAt(pos_cdna0 - mod + 1) + cDNA.charAt(pos_cdna0 - mod + 2));
annotation.mutCodon = ("" + mutRNA.charAt(pos_cdna0 - mod + 0) + mutRNA.charAt(pos_cdna0 - mod + 1) + mutRNA.charAt(pos_cdna0 - mod + 2));
annotation.position_protein = (pos_aa + 1);
annotation.wildAA = String.valueOf(wildProt.charAt(pos_aa));
annotation.mutAA = (String.valueOf(mutProt.charAt(pos_aa)));
if (isStop(wildProt.charAt(pos_aa)) && !isStop(mutProt.charAt(pos_aa))) {
annotation.seqont.add("stop_lost");
} else if (!isStop(wildProt.charAt(pos_aa)) && isStop(mutProt.charAt(pos_aa))) {
annotation.seqont.add("stop_gained");
} else if (wildProt.charAt(pos_aa) == mutProt.charAt(pos_aa)) {
annotation.seqont.add("synonymous");
} else {
annotation.seqont.add("missense_variant");
}
}
}
}
}
final Set<String> info = new HashSet<String>(all_annotations.size());
for (final Annotation a : all_annotations) {
info.add(a.toString());
}
final VariantContextBuilder vb = new VariantContextBuilder(ctx);
final String thetag;
switch(this.outputSyntax) {
case Vep:
thetag = "CSQ";
break;
case SnpEff:
thetag = "ANN";
break;
default:
thetag = TAG;
break;
}
vb.attribute(thetag, info.toArray());
w.add(vb.make());
}
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(w);
CloserUtil.close(r);
CloserUtil.close(this.referenceGenome);
}
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class VCFBedSetFilter method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VCFIterator r, final VariantContextWriter w) {
final Set<String> contigs_not_found = new HashSet<>();
final VCFHeader h2 = new VCFHeader(r.getHeader());
final VCFFilterHeaderLine filter;
if (!StringUtil.isBlank(this.filterName)) {
filter = new VCFFilterHeaderLine(this.filterName, "Variant " + (this.tabixBlackFile != null ? " overlapping any bed record of " : " that do not overlap any bed record of ") + tabixFile);
h2.addMetaDataLine(filter);
} else {
filter = null;
}
JVarkitVersion.getInstance().addMetaData(this, h2);
final ContigNameConverter ctgNameConverter;
if (this.bedReader != null) {
ctgNameConverter = ContigNameConverter.fromContigSet(this.bedReader.getContigs());
} else {
ctgNameConverter = ContigNameConverter.fromIntervalTreeMap(this.intervalTreeMap);
}
final SAMSequenceDictionary dict = r.getHeader().getSequenceDictionary();
w.writeHeader(h2);
while (r.hasNext()) {
final VariantContext ctx = r.next();
boolean set_filter = false;
final String convert_contig = ctgNameConverter.apply(ctx.getContig());
final int ctx_start = Math.max(1, ctx.getStart() - this.extend_bases);
final int ctx_end;
if (dict == null) {
ctx_end = ctx.getEnd() + this.extend_bases;
} else {
final SAMSequenceRecord ssr = dict.getSequence(ctx.getContig());
if (ssr == null)
throw new JvarkitException.ContigNotFoundInDictionary(ctx.getContig(), dict);
ctx_end = Math.min(ssr.getSequenceLength(), ctx.getEnd() + this.extend_bases);
}
if (StringUtil.isBlank(convert_contig)) {
if (debug)
LOG.warn("Cannot convert contig " + ctx.getContig());
if (contigs_not_found.size() < 100) {
if (contigs_not_found.add(ctx.getContig())) {
LOG.warn("Cannot convert variant contig " + ctx.getContig() + " to bed file. (Contig is not in BED file)");
}
}
set_filter = false;
} else if (this.intervalTreeMap != null) {
if (this.intervalTreeMap.getOverlapping(new SimpleInterval(convert_contig, ctx_start, ctx_end)).stream().anyMatch(LOC -> testOverlap(ctx_start, ctx_end, LOC))) {
if (debug)
LOG.warn("treemap.overlap=true set Filter=true for " + ctx.getContig() + ":" + ctx.getStart());
set_filter = true;
}
} else {
try (CloseableIterator<BedLine> iter = this.bedReader.iterator(convert_contig, ctx_start - 1, ctx_end + 1)) {
while (iter.hasNext()) {
final BedLine bed = iter.next();
if (bed == null)
continue;
if (!testOverlap(ctx_start, ctx_end, bed))
continue;
if (debug)
LOG.warn("tabix=true set Filter=true for " + ctx.getContig() + ":" + ctx.getStart());
set_filter = true;
break;
}
} catch (IOException err) {
throw new RuntimeIOException(err);
}
}
/* variant is in whitelist, we want to keep it */
if (this.tabixWhiteFile != null) {
set_filter = !set_filter;
if (debug)
LOG.warn("inverse. Now Filter=" + set_filter + " for " + ctx.getContig() + ":" + ctx.getStart());
}
if (!set_filter) {
if (debug)
LOG.warn("no filter. Writing " + ctx.getContig() + ":" + ctx.getStart());
w.add(ctx);
continue;
}
if (filter != null) {
if (debug)
LOG.warn("adding filter. Writing " + ctx.getContig() + ":" + ctx.getStart());
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.filter(filter.getID());
w.add(vcb.make());
} else {
if (debug)
LOG.warn("Ignoring " + ctx.getContig() + ":" + ctx.getStart());
}
}
if (!contigs_not_found.isEmpty()) {
LOG.warn("The following contigs were not found: " + String.join(" ", contigs_not_found) + "...");
}
return 0;
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class VcfGapFrequent method doWork.
@Override
public int doWork(final List<String> args) {
if (af_treshold < 0 || this.af_treshold > 1) {
LOG.error("bad treshold " + this.af_treshold);
return -1;
}
VCFIterator in = null;
try {
this.freqentDBs.addAll(IOUtils.unrollFiles2018(this.databases).stream().map(F -> new FreqentDB(F)).collect(Collectors.toList()));
if (this.freqentDBs.isEmpty()) {
LOG.error("no database was defined");
return -1;
}
if (this.freqentDBs.stream().flatMap(DB -> DB.afExtractors.stream()).count() == 0L) {
LOG.error("No AF extractor is suitable for any databases.");
return -1;
}
in = super.openVCFIterator(oneFileOrNull(args));
final VCFHeader header = in.getHeader();
this.dict = header.getSequenceDictionary();
if (this.dict == null || this.dict.isEmpty()) {
LOG.error(JvarkitException.VcfDictionaryMissing.getMessage("input"));
return -1;
}
this.out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
final List<Handler> handlers = new ArrayList<>(header.getNGenotypeSamples() + 1);
handlers.add(new Handler(null));
header.getSampleNamesInOrder().stream().map(S -> new Handler(S)).forEach(H -> handlers.add(H));
final ProgressFactory.Watcher<VariantContext> progress = ProgressFactory.newInstance().dictionary(header).validatingSortOrder(true).logger(LOG).build();
while (in.hasNext()) {
final VariantContext ctx = progress.apply(in.next());
handlers.stream().forEach(H -> H.visit(ctx));
}
handlers.stream().forEach(H -> H.finish());
progress.close();
out.flush();
out.close();
out = null;
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(in);
CloserUtil.close(out);
CloserUtil.close(this.freqentDBs);
}
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class GridssPostProcessVcf method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VCFIterator iterin, VariantContextWriter out) {
SortingCollection<VariantContext> sorter1 = null;
SortingCollection<VariantContext> sorter2 = null;
PrintWriter debug = null;
try {
debug = this.debugFile == null ? new PrintWriter(new NullOuputStream()) : new PrintWriter(Files.newBufferedWriter(this.debugFile));
final VCFHeader header = iterin.getHeader();
LOG.info("reading input.");
final Comparator<VariantContext> comparator = (A, B) -> A.getAttributeAsString(EVENT_KEY, "").compareTo(B.getAttributeAsString(EVENT_KEY, ""));
sorter1 = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(header, true), comparator, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
while (iterin.hasNext()) {
final VariantContext ctx = iterin.next();
if (!ctx.hasAttribute(EVENT_KEY)) {
LOG.warn("skipping variant without INFO/" + EVENT_KEY + " at " + toString(ctx));
continue;
}
sorter1.add(ctx);
}
sorter1.doneAdding();
sorter1.setDestructiveIteration(true);
LOG.info("done adding");
CloseableIterator<VariantContext> iter2 = sorter1.iterator();
@SuppressWarnings("resource") final EqualRangeIterator<VariantContext> equal_range = new EqualRangeIterator<>(iter2, comparator);
sorter2 = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(header, true), header.getVCFRecordComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
while (equal_range.hasNext()) {
final List<VariantContext> variants = equal_range.next();
if (variants.size() == 1) {
final VariantContext ctx = variants.get(0);
final Allele alt = ctx.getAlleles().get(1);
// INSERTION LIKE. chr22:1234 A/.AAAACAAGGAG
if (isATGC(ctx.getReference()) && isATGC(alt) && length(ctx.getReference()) < length(alt)) {
final VariantContextBuilder vcb1 = new VariantContextBuilder(ctx);
vcb1.attribute(VCFConstants.SVTYPE, "INS");
vcb1.attribute("SVLEN", length(alt) - length(ctx.getReference()));
sorter2.add(vcb1.make());
} else // STRANGE ? no ? change
if (isATGC(ctx.getReference()) && isATGC(alt) && length(ctx.getReference()) == 1 && length(alt) == 1) {
sorter2.add(ctx);
} else {
sorter2.add(ctx);
debug.println("ALONE " + toString(ctx));
}
} else if (variants.size() != 2) {
for (final VariantContext ctx : variants) {
debug.println("SIZE>2 " + toString(ctx));
sorter2.add(ctx);
}
} else {
final VariantContext ctx1 = variants.get(0);
final VariantContext ctx2 = variants.get(1);
final Breakend brk1 = Breakend.parse(ctx1.getAlleles().get(1)).orElse(null);
final Breakend brk2 = Breakend.parse(ctx2.getAlleles().get(1)).orElse(null);
if (brk1 == null || brk2 == null) {
debug.println("expected two bnd " + toString(ctx1) + " " + toString(ctx2));
sorter2.add(ctx1);
sorter2.add(ctx2);
return -1;
// continue;
}
/* should be the same breakend */
if (!ctx1.getContig().equals(brk2.getContig()) || !ctx2.getContig().equals(brk1.getContig()) || ctx1.getStart() != brk2.getStart() || ctx2.getStart() != brk1.getStart()) {
debug.println("expected concordant bnd " + toString(ctx1) + " " + toString(ctx2));
sorter2.add(ctx1);
sorter2.add(ctx2);
return -1;
// continue;
}
final VariantContextBuilder vcb1 = new VariantContextBuilder(ctx1);
final VariantContextBuilder vcb2 = new VariantContextBuilder(ctx2);
if (!ctx1.contigsMatch(ctx2)) {
vcb1.attribute(VCFConstants.SVTYPE, "CTX");
vcb2.attribute(VCFConstants.SVTYPE, "CTX");
sorter2.add(vcb1.make());
sorter2.add(vcb2.make());
continue;
}
if (ctx1.getStart() > ctx2.getStart()) {
debug.println("CTX1>CTX2?" + toString(ctx1) + " " + toString(ctx2));
sorter2.add(vcb1.make());
sorter2.add(vcb2.make());
continue;
}
final BndType bndType1 = getBndType(brk1);
final BndType bndType2 = getBndType(brk2);
if (bndType1.equals(BndType.DNA_OPEN) && bndType2.equals(BndType.CLOSE_DNA)) {
final Allele ctx1_alt = ctx1.getAlleles().get(1);
final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<DEL>", false));
vcb1.attribute(VCFConstants.SVTYPE, "DEL");
vcb1.alleles(alleles);
vcb1.attribute("SVLEN", ctx1.getEnd() - ctx2.getStart() + 1);
vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
sorter2.add(vcb1.make());
} else if (bndType1.equals(BndType.CLOSE_DNA) && bndType2.equals(BndType.DNA_OPEN)) {
final Allele ctx1_alt = ctx1.getAlleles().get(1);
final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<DUP>", false));
vcb1.attribute(VCFConstants.SVTYPE, "DUP");
vcb1.alleles(alleles);
vcb1.attribute("SVLEN", ctx2.getEnd() - ctx1.getStart() + 1);
vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
sorter2.add(vcb1.make());
} else if (bndType1.equals(BndType.OPEN_DNA) && bndType2.equals(BndType.OPEN_DNA)) {
final Allele ctx1_alt = ctx1.getAlleles().get(1);
final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<INV>", false));
vcb1.attribute(VCFConstants.SVTYPE, "INV");
vcb1.alleles(alleles);
vcb1.attribute("SVLEN", ctx2.getEnd() - ctx1.getStart() + 1);
vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
sorter2.add(vcb1.make());
} else if (bndType1.equals(BndType.DNA_CLOSE) && bndType2.equals(BndType.DNA_CLOSE)) {
final Allele ctx1_alt = ctx1.getAlleles().get(1);
final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<SV>", false));
vcb1.attribute(VCFConstants.SVTYPE, "SV");
vcb1.alleles(alleles);
vcb1.attribute("SVLEN", ctx2.getEnd() - ctx1.getStart() + 1);
vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
sorter2.add(vcb1.make());
} else {
debug.println("How to handle " + toString(ctx1) + " " + toString(ctx2));
sorter2.add(ctx1);
sorter2.add(ctx2);
}
}
}
equal_range.close();
iter2.close();
sorter1.cleanup();
sorter1 = null;
sorter2.doneAdding();
sorter2.setDestructiveIteration(true);
iter2 = sorter2.iterator();
final VCFHeader header2 = new VCFHeader(header);
JVarkitVersion.getInstance().addMetaData(this, header2);
final VCFInfoHeaderLine originalAlt = new VCFInfoHeaderLine("BND", 1, VCFHeaderLineType.String, "Original ALT allele.");
header.addMetaDataLine(originalAlt);
header.addMetaDataLine(new VCFInfoHeaderLine(originalAlt.getID(), 1, VCFHeaderLineType.Integer, "SV length"));
header.addMetaDataLine(new VCFInfoHeaderLine("SVLEN", 1, VCFHeaderLineType.Integer, "SV length"));
header.addMetaDataLine(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY, true));
out.writeHeader(header2);
while (iter2.hasNext()) {
out.add(iter2.next());
}
iter2.close();
sorter2.cleanup();
sorter2 = null;
debug.flush();
debug.close();
debug = null;
return 0;
} catch (final Throwable err) {
err.printStackTrace();
LOG.error(err);
return -1;
} finally {
if (sorter1 != null)
sorter1.cleanup();
if (sorter2 != null)
sorter2.cleanup();
if (debug != null)
try {
debug.close();
} catch (final Throwable err2) {
}
}
}
Aggregations