use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class VcfTrap method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VCFIterator iter, final VariantContextWriter out) {
final Map<String, Path> chromToFile = new HashMap<>();
IOUtil.assertFileIsReadable(this.manifestFile);
BufferedReader r = null;
try {
r = IOUtils.openPathForBufferedReading(this.manifestFile);
r.lines().filter(L -> !L.trim().isEmpty()).filter(L -> !L.startsWith("#")).forEach(L -> {
final int tab = L.indexOf('\t');
if (tab == -1)
throw new RuntimeIOException("tab missing in " + L);
final String contig = L.substring(0, tab);
if (chromToFile.containsKey(contig)) {
throw new RuntimeIOException("duplicate contig " + L);
}
final Path indexFile = Paths.get(L.substring(tab + 1));
IOUtil.assertFileIsReadable(indexFile);
chromToFile.put(contig, indexFile);
});
} catch (final IOException err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
}
final Function<String, IndexFile> getIndexFile = s -> {
Path file = chromToFile.get(s);
if (file == null && s.startsWith("chr"))
file = chromToFile.get(s.substring(3));
if (file == null && !s.startsWith("chr"))
file = chromToFile.get("chr" + s);
if (file == null)
return null;
try {
return new IndexFile(s, file);
} catch (final IOException err) {
throw new RuntimeIOException(err);
}
};
IndexFile current = null;
final String ATT_MIN = this.ATT + "_MIN";
final String ATT_MAX = this.ATT + "_MAX";
final Set<String> contigs_not_found = new HashSet<>();
final Comparator<TrapRecord> comparator = (A, B) -> {
if (!A.getContig().equals(B.getContig()))
throw new IllegalStateException("not the same contigs ???");
return Integer.compare(A.getStart(), B.getStart());
};
final VCFHeader header = new VCFHeader(iter.getHeader());
final VCFHeader header2 = new VCFHeader(header);
header2.addMetaDataLine(new VCFInfoHeaderLine(this.ATT, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "TRAP Score:((ALT|GENE|SCORE) in Trap Database http://trap-score.org/"));
header2.addMetaDataLine(new VCFInfoHeaderLine(ATT_MIN, 1, VCFHeaderLineType.Float, "Min Score in Trap Database http://trap-score.org/"));
header2.addMetaDataLine(new VCFInfoHeaderLine(ATT_MAX, 1, VCFHeaderLineType.Float, "Max Score in Trap Database http://trap-score.org/"));
JVarkitVersion.getInstance().addMetaData(this, header2);
final ProgressFactory.Watcher<VariantContext> progress = ProgressFactory.newInstance().dictionary(iter.getHeader()).logger(LOG).build();
out.writeHeader(header2);
while (iter.hasNext()) {
final VariantContext var = progress.apply(iter.next());
if (this.ignore_filtered && var.isFiltered()) {
out.add(var);
continue;
}
if (current == null || !current.contig.equals(var.getContig())) {
if (current != null) {
CloserUtil.close(current);
current = null;
}
if (contigs_not_found.contains(var.getContig())) {
out.add(var);
continue;
}
current = getIndexFile.apply(var.getContig());
}
if (current == null) {
LOG.warn("Not indexed in trap " + var.getContig());
contigs_not_found.add(var.getContig());
out.add(var);
continue;
}
final Set<String> annotations = new HashSet<>();
final Float[] min_score = new Float[] { null };
final Float[] max_score = new Float[] { null };
Algorithms.equal_range_stream(current, 0, current.size(), new TrapRecord() {
@Override
public int getStart() {
return var.getStart();
}
@Override
public int getEnd() {
return var.getEnd();
}
@Override
public String getContig() {
return var.getContig();
}
@Override
public String getChr() {
return getContig();
}
@Override
public float getScore() {
return 0f;
}
@Override
public char getRef() {
return '\0';
}
@Override
public String getGene() {
return "";
}
@Override
public char getAlt() {
return '\0';
}
}, comparator, Function.identity()).filter(R -> var.getReference().equals(Allele.create((byte) R.getRef(), true))).filter(R -> var.getAlternateAlleles().stream().anyMatch(A -> A.equals(Allele.create((byte) R.getAlt(), false)))).forEach(R -> {
annotations.add(String.join("|", String.valueOf(R.getAlt()), R.getGene(), String.format("%." + TrapIndexer.SCORE_STRLEN + "f", R.getScore())));
if (min_score[0] == null || min_score[0].compareTo(R.getScore()) > 0) {
min_score[0] = R.getScore();
}
if (max_score[0] == null || max_score[0].compareTo(R.getScore()) < 0) {
max_score[0] = R.getScore();
}
});
if (annotations.isEmpty()) {
out.add(var);
continue;
}
final VariantContextBuilder vcb = new VariantContextBuilder(var);
vcb.attribute(this.ATT, new ArrayList<>(annotations));
vcb.attribute(ATT_MIN, min_score[0]);
vcb.attribute(ATT_MAX, max_score[0]);
out.add(var);
}
out.close();
progress.close();
CloserUtil.close(current);
return 0;
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class VcfToIntervals method doWork.
@Override
public int doWork(final List<String> args) {
if (n_variants_per_interval >= 0 && distance_per_interval >= 0) {
LOG.info("n-variants per interval and distance both defined");
return -1;
} else if (n_variants_per_interval < 0 && distance_per_interval < 0) {
LOG.info("n-variants per interval or distance must be defined");
return -1;
}
try {
try (VCFIterator iter = super.openVCFIterator(oneFileOrNull(args))) {
final VCFHeader header = iter.getHeader();
final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
try (PrintWriter pw = super.openPathOrStdoutAsPrintWriter(this.outputFile)) {
if (!this.force_bed_output) {
final SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
final SAMFileHeader samHeader = new SAMFileHeader(dict);
samHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
for (final String sn : header.getSampleNamesInOrder()) {
final SAMReadGroupRecord rg = new SAMReadGroupRecord(sn);
rg.setSample(sn);
samHeader.addReadGroup(rg);
}
JVarkitVersion.getInstance().addMetaData(this, samHeader);
codec.encode(pw, samHeader);
}
while (iter.hasNext()) {
final VariantContext first = iter.next();
VariantContext last = first;
long n_variants = 1;
if (this.n_variants_per_interval > 0L) {
while (iter.hasNext() && n_variants < this.n_variants_per_interval) {
if (!first.contigsMatch(iter.peek())) {
break;
}
// consumme
last = iter.next();
n_variants++;
}
} else {
while (iter.hasNext()) {
final VariantContext curr = iter.peek();
if (!first.contigsMatch(curr)) {
break;
}
if (CoordMath.getLength(first.getStart(), curr.getEnd()) > this.distance_per_interval) {
break;
}
// consumme
last = iter.next();
n_variants++;
}
}
// next variant is just too close than the last one
while (this.min_distance >= 0 && iter.hasNext()) {
final VariantContext curr = iter.peek();
if (!last.withinDistanceOf(curr, this.min_distance))
break;
// consumme
last = iter.next();
n_variants++;
}
pw.print(first.getContig());
pw.print("\t");
pw.print(first.getStart() - (this.force_bed_output ? 1 : 0));
pw.print("\t");
pw.print(last.getEnd());
pw.print("\t");
pw.print(n_variants);
pw.print("\t");
pw.print(CoordMath.getLength(first.getStart(), last.getEnd()));
pw.println();
}
// end while iter
pw.flush();
}
// end out
}
// end vcf iterator
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
}
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class Launcher method doVcfToVcf.
protected int doVcfToVcf(final String inputNameOrNull, final File outorNull) {
VCFIterator iterin = null;
VariantContextWriter w = null;
try {
iterin = openVCFIterator(inputNameOrNull);
w = openVariantContextWriter(outorNull);
int ret = doVcfToVcf(inputNameOrNull == null ? "<STDIN>" : inputNameOrNull, iterin, w);
w.close();
w = null;
iterin.close();
iterin = null;
return ret;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iterin);
CloserUtil.close(w);
}
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class VcfLiftOver method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VCFIterator in, VariantContextWriter out) {
VariantContextWriter failed = null;
GenomicSequence genomicSequence = null;
try {
final VCFHeader inputHeader = in.getHeader();
final Set<VCFHeaderLine> headerLines = inputHeader.getMetaDataInInputOrder().stream().filter(V -> {
if (!(V instanceof VCFInfoHeaderLine))
return true;
final VCFInfoHeaderLine vih = VCFInfoHeaderLine.class.cast(V);
if (removeInfo.contains(vih.getID()))
return false;
return true;
}).collect(Collectors.toSet());
if (this.failedFile != null) {
final VCFHeader header2 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header2.addMetaDataLine(new VCFInfoHeaderLine(this.failedinfoTag, 1, VCFHeaderLineType.String, "Why the liftOver failed."));
failed = super.writingVariantsDelegate.dictionary(header2).open(failedFile);
failed.writeHeader(header2);
}
final VCFHeader header3 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
header3.setSequenceDictionary(this.indexedFastaSequenceFile.getSequenceDictionary());
JVarkitVersion.getInstance().addMetaData(this, header3);
header3.addMetaDataLine(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY, true));
header3.addMetaDataLine(new VCFInfoHeaderLine(this.infoTag, 1, VCFHeaderLineType.String, "Chromosome|Position before liftOver."));
out.writeHeader(header3);
while (in.hasNext()) {
VariantContext ctx = in.next();
if (!this.removeInfo.isEmpty()) {
VariantContextBuilder vcb = new VariantContextBuilder(ctx);
for (final String tag : this.removeInfo) vcb.rmAttribute(tag);
ctx = vcb.make();
}
if (ctx.isIndel() && this.ignoreIndels) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "Indel").make());
continue;
}
if (adaptivematch) {
double minAlleleLength = Math.min(0, ctx.getAlleles().stream().mapToInt(A -> A.length()).min().orElse(0));
double maxAlleleLength = Math.max(1, ctx.getAlleles().stream().mapToInt(A -> A.length()).max().orElse(1));
this.liftOver.setLiftOverMinMatch(minAlleleLength / maxAlleleLength);
}
final Interval lifted = liftOver.liftOver(new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), // negative strand
false, String.join("|", ctx.getContig(), String.valueOf(ctx.getStart()), ctx.getReference().toString())));
if (lifted == null) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "LiftOverFailed").make());
} else if (this.indexedFastaSequenceFile.getSequenceDictionary().getSequence(lifted.getContig()) == null) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "ContigMissingDictionary|" + lifted.getContig()).make());
} else {
boolean alleleAreValidatedVsRef = true;
// part of the code was copied from picard/liftovervcf
final Map<Allele, Allele> reverseComplementAlleleMap = new HashMap<>();
final List<Allele> alleles = new ArrayList<Allele>();
for (final Allele oldAllele : ctx.getAlleles()) {
final Allele fixedAllele;
if (oldAllele.isSymbolic() || oldAllele.isNoCall() || oldAllele.equals(Allele.SPAN_DEL)) {
alleles.add(oldAllele);
continue;
} else if (lifted.isPositiveStrand()) {
fixedAllele = oldAllele;
alleles.add(oldAllele);
} else {
fixedAllele = Allele.create(SequenceUtil.reverseComplement(oldAllele.getBaseString()), oldAllele.isReference());
alleles.add(fixedAllele);
reverseComplementAlleleMap.put(oldAllele, fixedAllele);
}
if (this.checkAlleleSequence) {
if (genomicSequence == null || !genomicSequence.getChrom().equals(lifted.getContig())) {
genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, lifted.getContig());
}
final String alleleStr = fixedAllele.getBaseString();
int x = 0;
while (x < alleleStr.length() && lifted.getStart() - 1 + x < genomicSequence.length()) {
final char refChar = genomicSequence.charAt(lifted.getStart() - 1 + x);
if (Character.toLowerCase(refChar) != Character.toLowerCase(alleleStr.charAt(x))) {
alleleAreValidatedVsRef = false;
break;
}
++x;
}
if (x != alleleStr.length()) {
alleleAreValidatedVsRef = false;
break;
}
}
}
if (!alleleAreValidatedVsRef) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleMismatchRef").make());
continue;
}
if (lifted.getEnd() - lifted.getStart() != ctx.getEnd() - ctx.getStart()) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleBadLength|" + lifted.length()).make());
continue;
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx.getSource(), lifted.getContig(), lifted.getStart(), lifted.getEnd(), alleles);
vcb.id(ctx.getID());
vcb.attributes(ctx.getAttributes());
vcb.attribute(this.infoTag, ctx.getContig() + "|" + ctx.getStart() + "|" + ctx.getReference().getDisplayString());
vcb.filters(ctx.getFilters());
vcb.log10PError(ctx.getLog10PError());
if (lifted.getStart() != lifted.getEnd()) {
vcb.attribute(VCFConstants.END_KEY, lifted.getEnd());
}
final GenotypesContext genotypeContext = ctx.getGenotypes();
final GenotypesContext fixedGenotypes = GenotypesContext.create(genotypeContext.size());
for (final Genotype genotype : genotypeContext) {
final List<Allele> fixedAlleles = new ArrayList<Allele>();
for (final Allele allele : genotype.getAlleles()) {
final Allele fixedAllele = reverseComplementAlleleMap.containsKey(allele) ? reverseComplementAlleleMap.get(allele) : allele;
fixedAlleles.add(fixedAllele);
}
fixedGenotypes.add(new GenotypeBuilder(genotype).alleles(fixedAlleles).make());
}
vcb.genotypes(fixedGenotypes);
out.add(vcb.make());
}
}
if (failed != null) {
failed.close();
failed = null;
}
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(failed);
}
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class VcfFilterByLiftOver method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VCFIterator in, final VariantContextWriter out) {
final ContigNameConverter anotherVcfCtgConverter;
if (this.anotherVcfReader != null) {
final SAMSequenceDictionary dict = this.anotherVcfReader.getHeader().getSequenceDictionary();
if (dict != null) {
anotherVcfCtgConverter = ContigNameConverter.fromOneDictionary(dict);
} else {
anotherVcfCtgConverter = ContigNameConverter.getIdentity();
}
} else {
anotherVcfCtgConverter = ContigNameConverter.getIdentity();
}
final LiftOver liftOver = new LiftOver(this.liftOverFile);
liftOver.setLiftOverMinMatch(this.userMinMatch);
final VCFHeader header = in.getHeader();
final SAMSequenceDictionary dict = header.getSequenceDictionary();
if (!this.disableValidation && dict != null && !dict.isEmpty()) {
liftOver.validateToSequences(dict);
}
final VCFHeader header2 = new VCFHeader(header);
final VCFFilterHeaderLine filterLiftOverFailed = new VCFFilterHeaderLine("LIFTOVER_FAILED", "liftover failed " + this.liftOverFile);
header2.addMetaDataLine(filterLiftOverFailed);
final VCFFilterHeaderLine filterNoSameContig = new VCFFilterHeaderLine("LIFTOVER_OTHER_CTG", "Variant is mapped to another contig after liftover with " + this.liftOverFile);
header2.addMetaDataLine(filterNoSameContig);
final VCFInfoHeaderLine infoLiftOverPos = new VCFInfoHeaderLine("LIFTOVER_LOC", 1, VCFHeaderLineType.String, "Position of the variant liftover-ed with " + this.liftOverFile);
header2.addMetaDataLine(infoLiftOverPos);
final VCFFilterHeaderLine filterDistantFromPrev = new VCFFilterHeaderLine("LIFTOVER_DISTANT", "After liftover the distance (< " + min_distance + ") with the previous variant is unusual( > " + max_distance + ") after liftover with " + this.liftOverFile);
header2.addMetaDataLine(filterDistantFromPrev);
/* transfert filters to new header */
if (this.anotherVcfReader != null) {
this.anotherVcfReader.getHeader().getFilterLines().forEach(F -> header2.addMetaDataLine(F));
}
JVarkitVersion.getInstance().addMetaData(this, header2);
out.writeHeader(header2);
Locatable prevCtx = null;
Locatable prevLifted = null;
final Function<Locatable, String> interval2str = R -> R.getContig() + "|" + R.getStart() + "|" + R.getEnd();
while (in.hasNext()) {
final VariantContext ctx = in.next();
if (prevCtx != null && !prevCtx.getContig().equals(ctx.getContig())) {
prevCtx = null;
prevLifted = null;
}
final Interval lifted = liftOver.liftOver(new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), // negative strand
false, interval2str.apply(ctx)));
// lifover failed
if (lifted == null) {
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.filter(filterLiftOverFailed.getID());
out.add(vcb.make());
} else // another contig
if (lifted != null && !sameContig(lifted, ctx)) {
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.filter(filterNoSameContig.getID());
vcb.attribute(infoLiftOverPos.getID(), interval2str.apply(lifted));
out.add(vcb.make());
} else // strange distance
if (prevCtx != null && lifted != null && prevLifted != null && prevCtx.getContig().equals(ctx.getContig()) && sameContig(prevLifted, lifted) && distance(prevCtx, ctx) < this.min_distance && distance(prevLifted, lifted) > this.max_distance) {
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.filter(filterDistantFromPrev.getID());
vcb.attribute(infoLiftOverPos.getID(), interval2str.apply(lifted));
out.add(vcb.make());
} else // filtered in anotherVcf
if (this.anotherVcfReader != null) {
final Set<String> found_filtered = new HashSet<>();
final String ctg2 = anotherVcfCtgConverter.apply(lifted.getContig());
if (!StringUtils.isBlank(ctg2)) {
try (CloseableIterator<VariantContext> iter2 = this.anotherVcfReader.query(ctg2, lifted.getStart(), lifted.getEnd())) {
while (iter2.hasNext()) {
final VariantContext ctx2 = iter2.next();
if (!ctx2.isFiltered() || ctx2.getLengthOnReference() != ctx.getLengthOnReference())
continue;
found_filtered.addAll(ctx2.getFilters());
break;
}
}
}
if (found_filtered.isEmpty()) {
out.add(ctx);
} else {
// add previous
found_filtered.addAll(ctx.getFilters());
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.filters(found_filtered);
out.add(vcb.make());
}
} else {
out.add(ctx);
}
prevCtx = ctx;
prevLifted = lifted;
}
return 0;
}
Aggregations