use of com.github.lindenb.jvarkit.variant.variantcontext.Breakend in project jvarkit by lindenb.
the class VcfPostProcessSV method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VCFIterator iterin, VariantContextWriter out) {
SortingCollection<VariantContext> sorter1 = null;
SortingCollection<VariantContext> sorter2 = null;
try {
final Counter<String> counter = new Counter<>();
if (StringUtils.isBlank(this.keys)) {
LOG.error("empty --keys");
return -1;
}
if (StringUtils.isBlank(this.newAlternateSymbol)) {
LOG.error("empty --allele");
return -1;
}
final VCFHeader header = iterin.getHeader();
if (header.getInfoHeaderLine(VCFConstants.SVTYPE) == null) {
header.addMetaDataLine(new VCFInfoHeaderLine(VCFConstants.SVTYPE, 1, VCFHeaderLineType.String, "Type of structural variant"));
}
if (header.getInfoHeaderLine("SVLEN") == null) {
header.addMetaDataLine(new VCFInfoHeaderLine("SVLEN", 1, VCFHeaderLineType.Integer, "Difference in length between REF and ALT alleles"));
}
if (header.getInfoHeaderLine(VCFConstants.END_KEY) == null) {
header.addMetaDataLine(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "End position of the variant described in this record"));
}
final String mateID = Arrays.stream(CharSplitter.COMMA.split(this.keys)).map(S -> S.trim()).filter(S -> !StringUtils.isBlank(S)).map(S -> header.getInfoHeaderLine(S)).filter(H -> H != null && H.getType().equals(VCFHeaderLineType.String)).map(H -> H.getID()).findFirst().orElse(null);
if (StringUtils.isBlank(mateID)) {
LOG.error("Cannot find INFO for mate-id (was " + this.keys + ")");
return -1;
}
LOG.info("Mate key is '" + mateID + "'. Reading input.");
final Comparator<VariantContext> comparator = (A, B) -> A.getAttributeAsString(mateID, "").compareTo(B.getAttributeAsString(mateID, ""));
sorter1 = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(header, true), comparator, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
sorter2 = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(header, true), header.getVCFRecordComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
while (iterin.hasNext()) {
final VariantContext ctx = iterin.next();
if (ctx.hasAttribute(mateID) && (!ctx.hasAttribute(VCFConstants.SVTYPE) || ctx.getAttributeAsString(VCFConstants.SVTYPE, "").equals("BND"))) {
sorter1.add(ctx);
} else {
counter.incr("Not a BND variant");
sorter2.add(ctx);
}
}
sorter1.doneAdding();
sorter1.setDestructiveIteration(true);
CloseableIterator<VariantContext> iter2 = sorter1.iterator();
PeekIterator<VariantContext> peek = new PeekIterator<>(iter2);
while (peek.hasNext()) {
final VariantContext first = peek.next();
final List<VariantContext> variants = new ArrayList<>();
variants.add(first);
while (peek.hasNext()) {
final VariantContext second = peek.peek();
if (first.hasID() && first.getID().equals(second.getAttributeAsString(mateID, ""))) {
variants.add(peek.next());
} else if (second.hasID() && second.getID().equals(first.getAttributeAsString(mateID, ""))) {
variants.add(peek.next());
} else if (first.getAttributeAsString(mateID, "").equals(second.getAttributeAsString(mateID, ""))) {
variants.add(peek.next());
} else {
break;
}
}
if (variants.size() != 2) {
counter.incr("Not a pair of Mate (" + variants.size() + ")", variants.size());
for (final VariantContext ctx : variants) {
sorter2.add(ctx);
}
continue;
}
Collections.sort(variants, (A, B) -> Integer.compare(A.getStart(), B.getStart()));
final VariantContext ctx1 = variants.get(0);
final VariantContext ctx2 = variants.get(1);
if (!(ctx1.getNAlleles() == 2 && ctx1.getNAlleles() == 2)) {
counter.incr("expected two alleles.", 2L);
sorter2.add(ctx1);
sorter2.add(ctx2);
continue;
}
final Breakend brk1 = Breakend.parse(ctx1.getAlleles().get(1)).orElse(null);
final Breakend brk2 = Breakend.parse(ctx2.getAlleles().get(1)).orElse(null);
if (brk1 == null || brk2 == null) {
counter.incr("ALT is not breakend.", 2L);
sorter2.add(ctx1);
sorter2.add(ctx2);
}
/* should be the same breakend
difference can be large use CIPOS / CIEND ?
if( !ctx1.getContig().equals(brk2.getContig()) ||
!ctx2.getContig().equals(brk1.getContig()) ||
Math.abs(ctx1.getStart()-brk2.getStart())>1 ||
Math.abs(ctx2.getStart()-brk1.getStart())>1) {
counter.incr("mate is not the same position.",2L);
sorter2.add(ctx1);
sorter2.add(ctx2);
continue;
}
*/
final VariantContextBuilder vcb1 = new VariantContextBuilder(ctx1);
final VariantContextBuilder vcb2 = new VariantContextBuilder(ctx2);
if (!ctx1.contigsMatch(ctx2)) {
vcb1.attribute(VCFConstants.SVTYPE, "TRANSLOC");
vcb2.attribute(VCFConstants.SVTYPE, "TRANSLOC");
sorter2.add(vcb1.make());
sorter2.add(vcb2.make());
counter.incr("translocation.", 2L);
continue;
}
final Allele ctx1_alt = ctx1.getAlleles().get(1);
final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<" + this.newAlternateSymbol.trim() + ">", false));
vcb1.attribute(VCFConstants.SVTYPE, this.newAlternateSymbol.trim());
vcb1.alleles(alleles);
final int ctx_end = Math.max(ctx1.getEnd(), ctx2.getEnd());
vcb1.stop(ctx_end);
vcb1.attribute("END", ctx_end);
vcb1.attribute("SVLEN", CoordMath.getLength(ctx1.getStart(), ctx_end));
vcb1.rmAttribute(mateID);
vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
sorter2.add(vcb1.make());
counter.incr("Two BND variants converted.", 2L);
}
iter2.close();
sorter1.cleanup();
sorter1 = null;
sorter2.doneAdding();
sorter2.setDestructiveIteration(true);
iter2 = sorter2.iterator();
final VCFHeader header2 = new VCFHeader(header);
JVarkitVersion.getInstance().addMetaData(this, header2);
out.writeHeader(header2);
while (iter2.hasNext()) {
out.add(iter2.next());
}
iter2.close();
sorter2.cleanup();
sorter2 = null;
if (!disable_summary) {
LOG.info("SUMMARY COUNT");
for (final String key : counter.keySet()) {
LOG.info("\t" + key + " : " + counter.count(key));
}
}
return 0;
} catch (final Throwable err) {
err.printStackTrace();
LOG.error(err);
return -1;
} finally {
if (sorter1 != null)
sorter1.cleanup();
if (sorter2 != null)
sorter2.cleanup();
}
}
use of com.github.lindenb.jvarkit.variant.variantcontext.Breakend in project jvarkit by lindenb.
the class GridssPostProcessVcf method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VCFIterator iterin, VariantContextWriter out) {
SortingCollection<VariantContext> sorter1 = null;
SortingCollection<VariantContext> sorter2 = null;
PrintWriter debug = null;
try {
debug = this.debugFile == null ? new PrintWriter(new NullOuputStream()) : new PrintWriter(Files.newBufferedWriter(this.debugFile));
final VCFHeader header = iterin.getHeader();
LOG.info("reading input.");
final Comparator<VariantContext> comparator = (A, B) -> A.getAttributeAsString(EVENT_KEY, "").compareTo(B.getAttributeAsString(EVENT_KEY, ""));
sorter1 = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(header, true), comparator, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
while (iterin.hasNext()) {
final VariantContext ctx = iterin.next();
if (!ctx.hasAttribute(EVENT_KEY)) {
LOG.warn("skipping variant without INFO/" + EVENT_KEY + " at " + toString(ctx));
continue;
}
sorter1.add(ctx);
}
sorter1.doneAdding();
sorter1.setDestructiveIteration(true);
LOG.info("done adding");
CloseableIterator<VariantContext> iter2 = sorter1.iterator();
@SuppressWarnings("resource") final EqualRangeIterator<VariantContext> equal_range = new EqualRangeIterator<>(iter2, comparator);
sorter2 = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(header, true), header.getVCFRecordComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
while (equal_range.hasNext()) {
final List<VariantContext> variants = equal_range.next();
if (variants.size() == 1) {
final VariantContext ctx = variants.get(0);
final Allele alt = ctx.getAlleles().get(1);
// INSERTION LIKE. chr22:1234 A/.AAAACAAGGAG
if (isATGC(ctx.getReference()) && isATGC(alt) && length(ctx.getReference()) < length(alt)) {
final VariantContextBuilder vcb1 = new VariantContextBuilder(ctx);
vcb1.attribute(VCFConstants.SVTYPE, "INS");
vcb1.attribute("SVLEN", length(alt) - length(ctx.getReference()));
sorter2.add(vcb1.make());
} else // STRANGE ? no ? change
if (isATGC(ctx.getReference()) && isATGC(alt) && length(ctx.getReference()) == 1 && length(alt) == 1) {
sorter2.add(ctx);
} else {
sorter2.add(ctx);
debug.println("ALONE " + toString(ctx));
}
} else if (variants.size() != 2) {
for (final VariantContext ctx : variants) {
debug.println("SIZE>2 " + toString(ctx));
sorter2.add(ctx);
}
} else {
final VariantContext ctx1 = variants.get(0);
final VariantContext ctx2 = variants.get(1);
final Breakend brk1 = Breakend.parse(ctx1.getAlleles().get(1)).orElse(null);
final Breakend brk2 = Breakend.parse(ctx2.getAlleles().get(1)).orElse(null);
if (brk1 == null || brk2 == null) {
debug.println("expected two bnd " + toString(ctx1) + " " + toString(ctx2));
sorter2.add(ctx1);
sorter2.add(ctx2);
return -1;
// continue;
}
/* should be the same breakend */
if (!ctx1.getContig().equals(brk2.getContig()) || !ctx2.getContig().equals(brk1.getContig()) || ctx1.getStart() != brk2.getStart() || ctx2.getStart() != brk1.getStart()) {
debug.println("expected concordant bnd " + toString(ctx1) + " " + toString(ctx2));
sorter2.add(ctx1);
sorter2.add(ctx2);
return -1;
// continue;
}
final VariantContextBuilder vcb1 = new VariantContextBuilder(ctx1);
final VariantContextBuilder vcb2 = new VariantContextBuilder(ctx2);
if (!ctx1.contigsMatch(ctx2)) {
vcb1.attribute(VCFConstants.SVTYPE, "CTX");
vcb2.attribute(VCFConstants.SVTYPE, "CTX");
sorter2.add(vcb1.make());
sorter2.add(vcb2.make());
continue;
}
if (ctx1.getStart() > ctx2.getStart()) {
debug.println("CTX1>CTX2?" + toString(ctx1) + " " + toString(ctx2));
sorter2.add(vcb1.make());
sorter2.add(vcb2.make());
continue;
}
final BndType bndType1 = getBndType(brk1);
final BndType bndType2 = getBndType(brk2);
if (bndType1.equals(BndType.DNA_OPEN) && bndType2.equals(BndType.CLOSE_DNA)) {
final Allele ctx1_alt = ctx1.getAlleles().get(1);
final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<DEL>", false));
vcb1.attribute(VCFConstants.SVTYPE, "DEL");
vcb1.alleles(alleles);
vcb1.attribute("SVLEN", ctx1.getEnd() - ctx2.getStart() + 1);
vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
sorter2.add(vcb1.make());
} else if (bndType1.equals(BndType.CLOSE_DNA) && bndType2.equals(BndType.DNA_OPEN)) {
final Allele ctx1_alt = ctx1.getAlleles().get(1);
final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<DUP>", false));
vcb1.attribute(VCFConstants.SVTYPE, "DUP");
vcb1.alleles(alleles);
vcb1.attribute("SVLEN", ctx2.getEnd() - ctx1.getStart() + 1);
vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
sorter2.add(vcb1.make());
} else if (bndType1.equals(BndType.OPEN_DNA) && bndType2.equals(BndType.OPEN_DNA)) {
final Allele ctx1_alt = ctx1.getAlleles().get(1);
final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<INV>", false));
vcb1.attribute(VCFConstants.SVTYPE, "INV");
vcb1.alleles(alleles);
vcb1.attribute("SVLEN", ctx2.getEnd() - ctx1.getStart() + 1);
vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
sorter2.add(vcb1.make());
} else if (bndType1.equals(BndType.DNA_CLOSE) && bndType2.equals(BndType.DNA_CLOSE)) {
final Allele ctx1_alt = ctx1.getAlleles().get(1);
final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<SV>", false));
vcb1.attribute(VCFConstants.SVTYPE, "SV");
vcb1.alleles(alleles);
vcb1.attribute("SVLEN", ctx2.getEnd() - ctx1.getStart() + 1);
vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
sorter2.add(vcb1.make());
} else {
debug.println("How to handle " + toString(ctx1) + " " + toString(ctx2));
sorter2.add(ctx1);
sorter2.add(ctx2);
}
}
}
equal_range.close();
iter2.close();
sorter1.cleanup();
sorter1 = null;
sorter2.doneAdding();
sorter2.setDestructiveIteration(true);
iter2 = sorter2.iterator();
final VCFHeader header2 = new VCFHeader(header);
JVarkitVersion.getInstance().addMetaData(this, header2);
final VCFInfoHeaderLine originalAlt = new VCFInfoHeaderLine("BND", 1, VCFHeaderLineType.String, "Original ALT allele.");
header.addMetaDataLine(originalAlt);
header.addMetaDataLine(new VCFInfoHeaderLine(originalAlt.getID(), 1, VCFHeaderLineType.Integer, "SV length"));
header.addMetaDataLine(new VCFInfoHeaderLine("SVLEN", 1, VCFHeaderLineType.Integer, "SV length"));
header.addMetaDataLine(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY, true));
out.writeHeader(header2);
while (iter2.hasNext()) {
out.add(iter2.next());
}
iter2.close();
sorter2.cleanup();
sorter2 = null;
debug.flush();
debug.close();
debug = null;
return 0;
} catch (final Throwable err) {
err.printStackTrace();
LOG.error(err);
return -1;
} finally {
if (sorter1 != null)
sorter1.cleanup();
if (sorter2 != null)
sorter2.cleanup();
if (debug != null)
try {
debug.close();
} catch (final Throwable err2) {
}
}
}
Aggregations