use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class VcfGnomadOld method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VCFIterator iter, final VariantContextWriter out) {
final VCFHeader h0 = iter.getHeader();
if (!SequenceDictionaryUtils.isGRCh37(h0)) {
LOG.warn("Input is NOT GRCh37 ?");
// can be lift over
}
final ProgressFactory.Watcher<VariantContext> progress = ProgressFactory.newInstance().dictionary(h0).logger(LOG).build();
final VCFHeader h2 = new VCFHeader(h0);
final Set<String> all_filters_from_gnomad = new HashSet<>();
final List<InfoField> infoFields = new ArrayList<>();
for (final OmeType ome : OmeType.values()) {
final ManifestEntry entry = this.manifestEntries.stream().filter(M -> M.omeType.equals(ome)).findFirst().orElse(null);
if (entry == null)
continue;
entry.open();
final VCFHeader header = entry.getHeader();
this.gnomadVersion = header.getInfoHeaderLines().stream().anyMatch(V -> V.getID().equals("non_neuro_AC_nfe_male")) ? GnomadVersion.v2_1 : GnomadVersion.v2_0;
LOG.debug("identified as gnomad version " + this.gnomadVersion);
if (!StringUtil.isBlank(this.filteredInGnomadFilterPrefix)) {
for (final VCFFilterHeaderLine fh : header.getFilterLines()) {
if (fh.getID().equals(VCFConstants.PASSES_FILTERS_v4))
continue;
final String fid = this.filteredInGnomadFilterPrefix + "_" + ome.name().toUpperCase() + "_" + fh.getID();
final VCFFilterHeaderLine fh2 = new VCFFilterHeaderLine(fid, "[gnomad-" + ome.name() + "]" + fh.getDescription());
h2.addMetaDataLine(fh2);
all_filters_from_gnomad.add(fid);
}
}
final Predicate<VCFInfoHeaderLine> acceptInfoTag;
if (StringUtil.isBlank(this.excludePatternStr)) {
acceptInfoTag = T -> true;
} else {
final Pattern pat = Pattern.compile(this.excludePatternStr);
acceptInfoTag = T -> !pat.matcher(T.getID()).find();
}
switch(this.gnomadVersion) {
case v2_0:
header.getInfoHeaderLines().stream().filter(acceptInfoTag).filter(FH -> FH.getID().equals("AC") || FH.getID().equals("AN") || FH.getID().equals("AF") || FH.getID().startsWith("AC_") || FH.getID().startsWith("AN_") || FH.getID().startsWith("AF_")).map(FH -> new InfoField(FH, ome)).forEach(FH -> infoFields.add(FH));
break;
case v2_1:
header.getInfoHeaderLines().stream().filter(acceptInfoTag).filter(FH -> !FH.getID().contains("MEDIAN")).filter(FH -> FH.getID().contains("AC") || FH.getID().contains("AN") || FH.getID().contains("AF")).map(FH -> new InfoField(FH, ome)).forEach(FH -> infoFields.add(FH));
break;
default:
throw new IllegalStateException("TODO " + this.gnomadVersion);
}
entry.close();
}
for (final InfoField f : infoFields) h2.addMetaDataLine(f.geOutputHeaderLine());
if (!StringUtil.isBlank(this.inGnomadFilterName)) {
h2.addMetaDataLine(new VCFFilterHeaderLine(this.inGnomadFilterName, "Variant CHROM/POS/REF was found in gnomad"));
}
if (!StringUtil.isBlank(this.overlapGnomadFilterName)) {
h2.addMetaDataLine(new VCFFilterHeaderLine(this.overlapGnomadFilterName, "Gnomad Variant was found overlapping the variant, not necessarily at the same CHROM/POS"));
}
JVarkitVersion.getInstance().addMetaData(this, h2);
out.writeHeader(h2);
final ManifestEntry[] om2manifest = new ManifestEntry[] { null, null };
while (iter.hasNext()) {
final VariantContext ctx = progress.apply(iter.next());
final Set<String> filters = new HashSet<>(ctx.getFilters());
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
for (final InfoField f : infoFields) vcb.rmAttribute(f.getOutputTag());
if (!StringUtil.isBlank(this.inGnomadFilterName)) {
filters.remove(this.inGnomadFilterName);
}
filters.removeAll(all_filters_from_gnomad);
if (!StringUtil.isBlank(this.overlapGnomadFilterName)) {
filters.remove(this.overlapGnomadFilterName);
}
if (this.skipFiltered && ctx.isFiltered()) {
vcb.filters(filters);
out.add(vcb.make());
continue;
}
if (ctx.getContig().equals("MT") || ctx.getContig().equals("chrM") || ctx.getContig().contains("_")) {
vcb.filters(filters);
out.add(vcb.make());
continue;
}
final List<Allele> alternateAlleles = ctx.getAlternateAlleles();
String newid = null;
boolean set_filter_ctx_is_in_gnomad = false;
boolean found_gnomad_overlapping_variant = false;
for (int omeIndex = 0; omeIndex < 2; omeIndex++) {
final OmeType omeType = omeIndex == 0 ? OmeType.exome : OmeType.genome;
if (this.useGenomeOnly && !omeType.equals(OmeType.genome))
continue;
if (om2manifest[omeIndex] != null && !om2manifest[omeIndex].acceptContig(ctx.getContig())) {
om2manifest[omeIndex].close();
om2manifest[omeIndex] = null;
}
if (om2manifest[omeIndex] == null) {
om2manifest[omeIndex] = this.manifestEntries.stream().filter(M -> M.omeType.equals(omeType) && M.acceptContig(ctx.getContig())).findFirst().orElse(null);
if (om2manifest[omeIndex] == null)
continue;
LOG.debug("Opening " + om2manifest[omeIndex].uri);
om2manifest[omeIndex].open();
}
// variant overlapping 'ctx'
final List<VariantContext> overlappingVariants = om2manifest[omeIndex].findOverlapping(ctx);
if (!overlappingVariants.isEmpty())
found_gnomad_overlapping_variant = true;
final List<VariantContext> gnomadVariants = overlappingVariants.stream().filter(V -> V.getStart() == ctx.getStart() && V.getReference().equals(ctx.getReference())).collect(Collectors.toList());
if (!gnomadVariants.isEmpty()) {
set_filter_ctx_is_in_gnomad = true;
// set new id ?
if (newid == null) {
newid = gnomadVariants.stream().filter(V -> V.hasID()).map(V -> V.getID()).findFirst().orElse(null);
}
// add FILTER(s)
if (!StringUtil.isBlank(this.filteredInGnomadFilterPrefix)) {
filters.addAll(gnomadVariants.stream().filter(V -> V.isFiltered()).flatMap(V -> V.getFilters().stream()).filter(F -> !F.equals(VCFConstants.PASSES_FILTERS_v4)).map(F -> this.filteredInGnomadFilterPrefix + "_" + omeType.name().toUpperCase() + "_" + F).collect(Collectors.toList()));
}
}
// loop over each field
for (final InfoField infoField : infoFields) {
if (!infoField.ome.equals(omeType))
continue;
if (gnomadVariants.stream().noneMatch(V -> V.hasAttribute(infoField.original.getID())))
continue;
if (infoField.getOutputLineCount().equals(VCFHeaderLineCount.INTEGER) && infoField.original.getCountType().equals(VCFHeaderLineCount.INTEGER)) {
final Set<Object> set = gnomadVariants.stream().filter(V -> V.hasAttribute(infoField.original.getID())).map(V -> infoField.parse(V.getAttribute(infoField.original.getID()))).collect(Collectors.toSet());
if (set.isEmpty())
continue;
if (set.size() == 1) {
vcb.attribute(infoField.getOutputTag(), set.iterator().next());
} else if (this.ignore_info_found_twice) {
LOG.warn("Found more than one value (" + set + ") for " + infoField + " " + ctx.getContig() + ":" + ctx.getStart());
vcb.attribute(infoField.getOutputTag(), set.iterator().next());
} else {
LOG.error("Found more than one value (" + set + ") for " + infoField + " " + ctx.getContig() + ":" + ctx.getStart() + ". Are you using a lift-overed gnomad ? Use option --ignore-error0 to skip this error");
progress.close();
return -1;
}
} else if (infoField.original.getCountType() == VCFHeaderLineCount.A) {
final Object[] numbers = new Object[alternateAlleles.size()];
Arrays.fill(numbers, infoField.getDefault());
for (int x = 0; x < alternateAlleles.size(); ++x) {
final Allele alt = alternateAlleles.get(x);
if (alt.equals(Allele.SPAN_DEL))
continue;
for (final VariantContext gv : gnomadVariants) {
int idx = gv.getAlternateAlleles().indexOf(alt);
if (idx == -1)
continue;
if (!gv.hasAttribute(infoField.original.getID()))
continue;
final List<Object> array = gv.getAttributeAsList(infoField.original.getID());
if (idx >= array.size())
continue;
numbers[x] = infoField.parse(array.get(idx));
}
}
vcb.attribute(infoField.getOutputTag(), numbers);
} else if (infoField.original.isFixedCount() && infoField.original.getCount() == 1) {
final Object[] numbers = new Object[alternateAlleles.size()];
Arrays.fill(numbers, infoField.getDefault());
boolean something_found_for_an_allele = false;
for (int x = 0; x < alternateAlleles.size(); ++x) {
final Allele alt = alternateAlleles.get(x);
if (alt.equals(Allele.SPAN_DEL))
continue;
for (final VariantContext gv : gnomadVariants) {
int idx = gv.getAlternateAlleles().indexOf(alt);
if (idx == -1)
continue;
if (!gv.hasAttribute(infoField.original.getID()))
continue;
something_found_for_an_allele = true;
final Object val = infoField.parse(gv.getAttribute(infoField.original.getID()));
if (val == null)
continue;
numbers[x] = val;
}
}
if (something_found_for_an_allele) {
vcb.attribute(infoField.getOutputTag(), numbers);
}
} else {
LOG.error("not handled " + infoField);
progress.close();
return -1;
}
}
}
if (set_filter_ctx_is_in_gnomad && !StringUtil.isBlank(this.inGnomadFilterName)) {
filters.add(this.inGnomadFilterName);
}
if (found_gnomad_overlapping_variant && !StringUtil.isBlank(this.overlapGnomadFilterName)) {
filters.add(this.overlapGnomadFilterName);
}
if (!this.doNotUpdateId && !ctx.hasID() && !StringUtil.isBlank(newid))
vcb.id(newid);
vcb.filters(filters);
out.add(vcb.make());
}
out.close();
progress.close();
for (int omeIndex = 0; omeIndex < 2; omeIndex++) CloserUtil.close(om2manifest[omeIndex]);
return 0;
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class IranomeScrapper method doWork.
@Override
public int doWork(final List<String> args) {
TabixReader tabixReader = null;
VariantContextWriter failedWriter = null;
try {
/**
* create http client
*/
// createDefault();
this.httpClient = HttpClients.createSystem();
try (VCFIterator iter = super.openVCFIterator(oneFileOrNull(args))) {
if (this.tabixDatabase != null) {
tabixReader = new TabixReader(this.tabixDatabase);
}
if (this.failedVcfPath != null) {
failedWriter = VCFUtils.createVariantContextWriterToPath(failedVcfPath);
failedWriter.writeHeader(iter.getHeader());
}
try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
pw.print("CHROM\tPOS\tREF\tALT\tAC\tAN\tAF");
for (int i = 0; i < populations.size(); i++) {
pw.print("\tAC_" + populations.get(i).toUpperCase().replace(' ', '_'));
}
for (int i = 0; i < populations.size(); i++) {
pw.print("\tAN_" + populations.get(i).toUpperCase().replace(' ', '_'));
}
pw.println("\tdate");
while (iter.hasNext()) {
final VariantContext ctx = iter.next();
if (!AcidNucleics.isATGC(ctx.getReference()))
continue;
final String iranomeCtg = toIranomeContig(ctx.getContig());
for (final Allele alt : ctx.getAlternateAlleles()) {
if (!AcidNucleics.isATGC(alt))
continue;
if (tabixReader != null) {
boolean found = false;
final TabixReader.Iterator iter2 = tabixReader.query(iranomeCtg, ctx.getStart(), ctx.getEnd());
for (; ; ) {
final String line2 = iter2.next();
if (line2 == null)
break;
final String[] tokens = CharSplitter.TAB.split(line2);
if (tokens.length < 4)
throw new JvarkitException.TokenErrors(4, tokens);
if (tokens[0].equals(iranomeCtg) && Integer.parseInt(tokens[1]) == ctx.getStart() && tokens[2].equals(ctx.getReference().getDisplayString()) && tokens[3].equals(alt.getDisplayString())) {
LOG.info("already in database " + String.join("-", Arrays.asList(tokens).subList(0, 4)));
found = true;
break;
}
if (found)
continue;
}
// end tabix terator
}
// end tabix!=null
final Map<String, Object> hash = this.getIranomeVariant(ctx, alt);
if (hash == null) {
if (failedWriter != null) {
failedWriter.add(ctx);
}
continue;
}
for (int i = 0; i < fields.size(); i++) {
pw.print(i > 0 ? "\t" : "");
pw.print(hash.get(fields.get(i)));
}
for (int i = 0; i < populations.size(); i++) {
pw.print("\t" + hash.get("AC_" + populations.get(i)));
}
for (int i = 0; i < populations.size(); i++) {
pw.print("\t" + hash.get("AN_" + populations.get(i)));
}
pw.print("\t");
pw.println(StringUtils.now());
}
}
pw.flush();
}
// end print
}
// end iter
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(tabixReader);
CloserUtil.close(failedWriter);
CloserUtil.close(this.httpClient);
}
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class VcfFisherCombinatorics method doWork.
@Override
public int doWork(final List<String> args) {
try {
final Pedigree pedigree = new PedigreeParser().parse(this.pedigreeFile);
final IntervalTreeMap<List<GeneInfo>> geneInfoMap = new IntervalTreeMap<>();
final Predicate<Genotype> isGtCnv = G -> G.isHet() || G.isHomVar();
try (VCFIterator vcfIter = super.openVCFIterator(super.oneFileOrNull(args))) {
final VCFHeader header = vcfIter.getHeader();
final ContigNameConverter ctgConverter = ContigNameConverter.fromOneDictionary(SequenceDictionaryUtils.extractRequired(header));
if (!header.hasGenotypingData()) {
LOG.error("No Genotype data in " + header);
return -1;
}
final Map<String, Integer> sample2index = header.getSampleNameToOffset();
final List<Integer> casesIdx = pedigree.getSamplesInVcfHeader(header).filter(S -> S.isAffected()).map(S -> S.getId()).map(S -> sample2index.get(S)).sorted().collect(Collectors.toList());
LOG.info("cases N=" + casesIdx.size());
if (casesIdx.isEmpty()) {
LOG.error("No affected/cases sample in the input VCF");
return -1;
}
final List<Integer> controlsIdx = pedigree.getSamplesInVcfHeader(header).filter(S -> S.isUnaffected()).map(S -> S.getId()).map(S -> sample2index.get(S)).sorted().collect(Collectors.toList());
LOG.info("controls N=" + controlsIdx.size());
if (controlsIdx.isEmpty()) {
LOG.error("No unaffected/controls sample in the input VCF");
return -1;
}
final Predicate<VariantContext> acceptCtx = V -> {
if (discard_bnd && V.getAttributeAsString(VCFConstants.SVTYPE, "").equals("BND"))
return false;
if (discard_filtered && V.isFiltered())
return false;
if (max_af < 1.0) {
final Iterator<Integer> it = Stream.concat(controlsIdx.stream(), casesIdx.stream()).iterator();
int ac = 0;
int an = 0;
while (it.hasNext()) {
switch(V.getGenotype(it.next()).getType()) {
case HOM_VAR:
ac += 2;
an += 2;
break;
case HOM_REF:
ac += 0;
an += 2;
break;
case HET:
ac += 1;
an += 2;
break;
default:
break;
}
}
if (an == 0)
return false;
if (ac / (double) an > max_af)
return false;
}
return true;
};
try (BedLineReader br = new BedLineReader(this.bedPath)) {
while (br.hasNext()) {
final BedLine bed = br.next();
final String ctg = ctgConverter.apply(bed.getContig());
if (StringUtil.isBlank(ctg))
continue;
final GeneInfo geneInfo = new GeneInfo();
geneInfo.casesflags = new BitSet(casesIdx.size());
geneInfo.controlsflags = new BitSet(controlsIdx.size());
geneInfo.contig = ctg;
geneInfo.name = bed.getOrDefault(3, "undefined");
geneInfo.parts.add(new SimpleInterval(bed).renameContig(ctg));
final Interval key = new Interval(geneInfo);
List<GeneInfo> L = geneInfoMap.get(key);
if (L == null) {
L = new ArrayList<>();
geneInfoMap.put(key, L);
}
L.add(geneInfo);
}
}
if (geneInfoMap.isEmpty()) {
LOG.error("no gene found in " + this.bedPath);
return -1;
}
LOG.info("reading variants...");
while (vcfIter.hasNext()) {
final VariantContext ctx = vcfIter.next();
if (!acceptCtx.test(ctx))
continue;
for (List<GeneInfo> gil : geneInfoMap.getOverlapping(ctx)) {
for (GeneInfo gi : gil) {
for (int y = 0; y < casesIdx.size(); ++y) {
final Genotype gt = ctx.getGenotype(casesIdx.get(y));
if (!isGtCnv.test(gt))
continue;
gi.casesflags.set(y);
}
for (int y = 0; y < controlsIdx.size(); ++y) {
final Genotype gt = ctx.getGenotype(controlsIdx.get(y));
if (!isGtCnv.test(gt))
continue;
gi.controlsflags.set(y);
}
}
}
}
/* remove Genes without variant , count sample per gene*/
for (List<GeneInfo> gil : geneInfoMap.values()) {
gil.removeIf(GI -> GI.casesflags.nextSetBit(0) == -1 && GI.controlsflags.nextSetBit(0) == -1);
}
// end remove
/* load bed file */
final List<GeneInfo> geneList = geneInfoMap.values().stream().filter(L -> !L.isEmpty()).flatMap(L -> L.stream()).sorted().collect(Collectors.toList());
if (geneList.isEmpty()) {
LOG.error("no gene found in " + this.bedPath);
return -1;
}
LOG.info("N Genes=" + geneList.size());
Solution best = null;
for (int i = 2; i < Math.min(this.max_num_genes, geneList.size()); i++) {
LOG.info("sarting loop from " + i);
best = recursion(geneList, casesIdx, controlsIdx, new ArrayList<>(), i, best);
}
try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
pw.println(best);
pw.flush();
}
}
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
}
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class VcfPostProcessSV method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VCFIterator iterin, VariantContextWriter out) {
SortingCollection<VariantContext> sorter1 = null;
SortingCollection<VariantContext> sorter2 = null;
try {
final Counter<String> counter = new Counter<>();
if (StringUtils.isBlank(this.keys)) {
LOG.error("empty --keys");
return -1;
}
if (StringUtils.isBlank(this.newAlternateSymbol)) {
LOG.error("empty --allele");
return -1;
}
final VCFHeader header = iterin.getHeader();
if (header.getInfoHeaderLine(VCFConstants.SVTYPE) == null) {
header.addMetaDataLine(new VCFInfoHeaderLine(VCFConstants.SVTYPE, 1, VCFHeaderLineType.String, "Type of structural variant"));
}
if (header.getInfoHeaderLine("SVLEN") == null) {
header.addMetaDataLine(new VCFInfoHeaderLine("SVLEN", 1, VCFHeaderLineType.Integer, "Difference in length between REF and ALT alleles"));
}
if (header.getInfoHeaderLine(VCFConstants.END_KEY) == null) {
header.addMetaDataLine(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "End position of the variant described in this record"));
}
final String mateID = Arrays.stream(CharSplitter.COMMA.split(this.keys)).map(S -> S.trim()).filter(S -> !StringUtils.isBlank(S)).map(S -> header.getInfoHeaderLine(S)).filter(H -> H != null && H.getType().equals(VCFHeaderLineType.String)).map(H -> H.getID()).findFirst().orElse(null);
if (StringUtils.isBlank(mateID)) {
LOG.error("Cannot find INFO for mate-id (was " + this.keys + ")");
return -1;
}
LOG.info("Mate key is '" + mateID + "'. Reading input.");
final Comparator<VariantContext> comparator = (A, B) -> A.getAttributeAsString(mateID, "").compareTo(B.getAttributeAsString(mateID, ""));
sorter1 = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(header, true), comparator, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
sorter2 = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(header, true), header.getVCFRecordComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
while (iterin.hasNext()) {
final VariantContext ctx = iterin.next();
if (ctx.hasAttribute(mateID) && (!ctx.hasAttribute(VCFConstants.SVTYPE) || ctx.getAttributeAsString(VCFConstants.SVTYPE, "").equals("BND"))) {
sorter1.add(ctx);
} else {
counter.incr("Not a BND variant");
sorter2.add(ctx);
}
}
sorter1.doneAdding();
sorter1.setDestructiveIteration(true);
CloseableIterator<VariantContext> iter2 = sorter1.iterator();
PeekIterator<VariantContext> peek = new PeekIterator<>(iter2);
while (peek.hasNext()) {
final VariantContext first = peek.next();
final List<VariantContext> variants = new ArrayList<>();
variants.add(first);
while (peek.hasNext()) {
final VariantContext second = peek.peek();
if (first.hasID() && first.getID().equals(second.getAttributeAsString(mateID, ""))) {
variants.add(peek.next());
} else if (second.hasID() && second.getID().equals(first.getAttributeAsString(mateID, ""))) {
variants.add(peek.next());
} else if (first.getAttributeAsString(mateID, "").equals(second.getAttributeAsString(mateID, ""))) {
variants.add(peek.next());
} else {
break;
}
}
if (variants.size() != 2) {
counter.incr("Not a pair of Mate (" + variants.size() + ")", variants.size());
for (final VariantContext ctx : variants) {
sorter2.add(ctx);
}
continue;
}
Collections.sort(variants, (A, B) -> Integer.compare(A.getStart(), B.getStart()));
final VariantContext ctx1 = variants.get(0);
final VariantContext ctx2 = variants.get(1);
if (!(ctx1.getNAlleles() == 2 && ctx1.getNAlleles() == 2)) {
counter.incr("expected two alleles.", 2L);
sorter2.add(ctx1);
sorter2.add(ctx2);
continue;
}
final Breakend brk1 = Breakend.parse(ctx1.getAlleles().get(1)).orElse(null);
final Breakend brk2 = Breakend.parse(ctx2.getAlleles().get(1)).orElse(null);
if (brk1 == null || brk2 == null) {
counter.incr("ALT is not breakend.", 2L);
sorter2.add(ctx1);
sorter2.add(ctx2);
}
/* should be the same breakend
difference can be large use CIPOS / CIEND ?
if( !ctx1.getContig().equals(brk2.getContig()) ||
!ctx2.getContig().equals(brk1.getContig()) ||
Math.abs(ctx1.getStart()-brk2.getStart())>1 ||
Math.abs(ctx2.getStart()-brk1.getStart())>1) {
counter.incr("mate is not the same position.",2L);
sorter2.add(ctx1);
sorter2.add(ctx2);
continue;
}
*/
final VariantContextBuilder vcb1 = new VariantContextBuilder(ctx1);
final VariantContextBuilder vcb2 = new VariantContextBuilder(ctx2);
if (!ctx1.contigsMatch(ctx2)) {
vcb1.attribute(VCFConstants.SVTYPE, "TRANSLOC");
vcb2.attribute(VCFConstants.SVTYPE, "TRANSLOC");
sorter2.add(vcb1.make());
sorter2.add(vcb2.make());
counter.incr("translocation.", 2L);
continue;
}
final Allele ctx1_alt = ctx1.getAlleles().get(1);
final List<Allele> alleles = Arrays.asList(ctx1.getReference(), Allele.create("<" + this.newAlternateSymbol.trim() + ">", false));
vcb1.attribute(VCFConstants.SVTYPE, this.newAlternateSymbol.trim());
vcb1.alleles(alleles);
final int ctx_end = Math.max(ctx1.getEnd(), ctx2.getEnd());
vcb1.stop(ctx_end);
vcb1.attribute("END", ctx_end);
vcb1.attribute("SVLEN", CoordMath.getLength(ctx1.getStart(), ctx_end));
vcb1.rmAttribute(mateID);
vcb1.genotypes(ctx1.getGenotypes().stream().map(GT -> new GenotypeBuilder(GT).alleles(GT.getAlleles().stream().map(A -> A.equals(ctx1_alt) ? alleles.get(1) : A).collect(Collectors.toList())).make()).collect(Collectors.toList()));
sorter2.add(vcb1.make());
counter.incr("Two BND variants converted.", 2L);
}
iter2.close();
sorter1.cleanup();
sorter1 = null;
sorter2.doneAdding();
sorter2.setDestructiveIteration(true);
iter2 = sorter2.iterator();
final VCFHeader header2 = new VCFHeader(header);
JVarkitVersion.getInstance().addMetaData(this, header2);
out.writeHeader(header2);
while (iter2.hasNext()) {
out.add(iter2.next());
}
iter2.close();
sorter2.cleanup();
sorter2 = null;
if (!disable_summary) {
LOG.info("SUMMARY COUNT");
for (final String key : counter.keySet()) {
LOG.info("\t" + key + " : " + counter.count(key));
}
}
return 0;
} catch (final Throwable err) {
err.printStackTrace();
LOG.error(err);
return -1;
} finally {
if (sorter1 != null)
sorter1.cleanup();
if (sorter2 != null)
sorter2.cleanup();
}
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class VcfToSql method read.
private void read(File filename) throws IOException {
/* insert ATGC */
this.alleleTable.insert(outputWriter, null, "A");
this.alleleTable.insert(outputWriter, null, "C");
this.alleleTable.insert(outputWriter, null, "G");
this.alleleTable.insert(outputWriter, null, "T");
/* insert this sample */
this.vcfFileTable.insert(outputWriter, null, filename);
final SelectStmt vcffile_id = new SelectStmt(this.vcfFileTable);
final Map<String, SelectStmt> sample2sampleid = new HashMap<String, SelectStmt>();
final Map<String, SelectStmt> filter2filterid = new HashMap<String, SelectStmt>();
final Map<String, SelectStmt> chrom2chromId = new HashMap<String, SelectStmt>();
final VCFIterator r = VCFUtils.createVCFIteratorFromFile(filename);
final VCFHeader header = r.getHeader();
/* parse samples */
for (final String sampleName : header.getSampleNamesInOrder()) {
this.sampleTable.insert(outputWriter, null, sampleName);
SelectStmt sample_id = new SelectStmt(this.sampleTable, "name", sampleName);
sample2sampleid.put(sampleName, sample_id);
this.sample2fileTable.insert(outputWriter, null, vcffile_id, sample_id);
}
/* parse filters */
for (final VCFFilterHeaderLine filter : header.getFilterLines()) {
this.filterTable.insert(outputWriter, null, vcffile_id, filter.getID(), filter.getValue());
filter2filterid.put(filter.getID(), new SelectStmt(this.filterTable, "name", filter.getID()));
}
filter2filterid.put(VCFConstants.PASSES_FILTERS_v4, new SelectStmt(this.filterTable, "name", VCFConstants.PASSES_FILTERS_v4));
final SAMSequenceDictionary dict = header.getSequenceDictionary();
if (dict == null) {
throw new RuntimeException("dictionary missing in VCF");
}
/* parse sequence dict */
for (final SAMSequenceRecord ssr : dict.getSequences()) {
this.chromosomeTable.insert(outputWriter, null, vcffile_id, ssr.getSequenceName(), ssr.getSequenceLength());
chrom2chromId.put(ssr.getSequenceName(), new SelectStmt(this.chromosomeTable, "name", ssr.getSequenceName()));
}
VepPredictionParser vepPredictionParser = new VepPredictionParserFactory(header).get();
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
int nVariants = 0;
while (r.hasNext()) {
if (this.outputWriter.checkError())
break;
VariantContext var = progress.watch(r.next());
++nVariants;
/* insert ref allele */
this.alleleTable.insert(outputWriter, null, var.getReference().getBaseString());
/* insert variant */
this.variantTable.insert(outputWriter, null, vcffile_id, nVariants, chrom2chromId.get(var.getContig()), var.getStart(), (var.hasID() ? var.getID() : null), new SelectStmt(this.alleleTable, "bases", var.getReference().getBaseString()), (var.hasLog10PError() ? var.getPhredScaledQual() : null));
SelectStmt variant_id = new SelectStmt(variantTable);
/* insert alternate alleles */
for (Allele alt : var.getAlternateAlleles()) {
/* insert alt allele */
this.alleleTable.insert(outputWriter, null, alt.getBaseString());
this.variant2altTable.insert(outputWriter, null, variant_id, new SelectStmt(this.alleleTable, "bases", alt.getBaseString()));
}
/* insert filters */
for (final String filter : var.getFilters()) {
if (filter2filterid.get(filter) == null) {
throw new IOException("VCF Error: filter " + filter + " is not defined in the VCF header.");
}
this.variant2filters.insert(outputWriter, null, variant_id, filter2filterid.get(filter));
}
if (!this.ignore_info) {
for (final VepPrediction pred : vepPredictionParser.getPredictions(var)) {
/*
vepPrediction.insert(
outputWriter,
null,
variant_id,
pred.getEnsemblGene(),
pred.getEnsemblTranscript(),
pred.getEnsemblProtein(),
pred.getSymbol()
);
SelectStmt pred_id = new SelectStmt(vepPrediction);
for(SequenceOntologyTree.Term t: pred.getSOTerms())
{
String term=t.getAcn().replace(':', '_');
soTermTable.insert(
outputWriter,
null,
term,
t.getAcn()
);//for bioportal compatibility
SelectStmt term_id = new SelectStmt(soTermTable,"acn",term);
vepPrediction2so.insert(
outputWriter,
null,
pred_id,
term_id
);
}
*/
}
}
/* insert genotypes */
for (final String sampleName : sample2sampleid.keySet()) {
final Genotype g = var.getGenotype(sampleName);
if (!g.isAvailable() || g.isNoCall())
continue;
genotypeTable.insert(outputWriter, null, variant_id, sample2sampleid.get(sampleName), g.isCalled() ? new SelectStmt(this.alleleTable, "bases", g.getAllele(0).getBaseString()) : null, g.isCalled() ? new SelectStmt(this.alleleTable, "bases", g.getAllele(1).getBaseString()) : null, g.hasDP() ? g.getDP() : null, g.hasGQ() ? g.getGQ() : null);
}
}
r.close();
}
Aggregations