use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.
the class VcfGeneOntology method executeThings.
public int executeThings(List<String> args) {
VcfIterator vcfIn = null;
try {
vcfIn = super.openVcfIterator(oneFileOrNull(args));
this.filterVcfIterator(vcfIn);
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(vcfIn);
}
}
use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.
the class VcfCompareCallersOneSample method doWork.
@Override
public int doWork(List<String> args) {
File inputFile = null;
List<EqualRangeVcfIterator> listChallengers = new ArrayList<>();
VariantContextWriter vcw = null;
VcfIterator in = null;
try {
in = super.openVcfIterator(oneFileOrNull(args));
VCFHeader header = in.getHeader();
if (header.getNGenotypeSamples() != 1) {
LOG.error("vcf.must.have.only.one.sample");
return -1;
}
VCFHeader h2 = new VCFHeader(header);
h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));
SAMSequenceDictionary dict = header.getSequenceDictionary();
if (dict == null) {
LOG.error("no.dict.in.vcf");
return -1;
}
Comparator<VariantContext> ctxComparator = VCFUtils.createTidPosComparator(dict);
/* load files to be challenged */
for (File cf : this.challengerVcf) {
// do not challenge vs itself
if (inputFile != null && inputFile.equals(cf)) {
LOG.error("Ignoring challenger (self): " + cf);
continue;
}
VcfIterator cin = VCFUtils.createVcfIteratorFromFile(cf);
VCFHeader ch = cin.getHeader();
if (ch.getNGenotypeSamples() != 1) {
LOG.warning("vcf.must.have.only.one.sample");
cin.close();
continue;
}
if (!header.getSampleNamesInOrder().get(0).equals(ch.getSampleNamesInOrder().get(0))) {
LOG.warning("Ignoring " + cf + " because not the same sample.");
cin.close();
continue;
}
SAMSequenceDictionary hdict = ch.getSequenceDictionary();
if (hdict == null || !SequenceUtil.areSequenceDictionariesEqual(dict, hdict)) {
LOG.error("not.the.same.sequence.dictionaries");
return -1;
}
listChallengers.add(new EqualRangeVcfIterator(cin, ctxComparator));
}
vcw = super.openVariantContextWriter(outputFile);
vcw.writeHeader(h2);
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
VariantContext prev_ctx = null;
while (in.hasNext() && !vcw.checkError()) {
VariantContext ctx = progress.watch(in.next());
// check input order
if (prev_ctx != null && ctxComparator.compare(prev_ctx, ctx) > 0) {
LOG.error("bad sort order : got\n\t" + prev_ctx + "\nbefore\n\t" + ctx + "\n");
return -1;
}
prev_ctx = ctx;
int countInOtherFiles = 0;
for (EqualRangeVcfIterator citer : listChallengers) {
boolean foundInThatFile = false;
List<VariantContext> ctxChallenging = citer.next(ctx);
for (VariantContext ctx2 : ctxChallenging) {
if (!ctx2.getReference().equals(ctx.getReference()))
continue;
boolean ok = true;
if (!this.ignoreAlternate) {
Set<Allele> myAlt = new HashSet<Allele>(ctx.getAlternateAlleles());
myAlt.removeAll(ctx2.getAlternateAlleles());
if (!myAlt.isEmpty())
ok = false;
}
if (ok) {
foundInThatFile = true;
break;
}
}
countInOtherFiles += (foundInThatFile ? 1 : 0);
}
if (countInOtherFiles >= minCountInclusive && countInOtherFiles <= maxCountInclusive) {
vcw.add(ctx);
}
}
progress.finish();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(vcw);
CloserUtil.close(listChallengers);
CloserUtil.close(in);
}
}
use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.
the class VcfIn method doWork.
@Override
public int doWork(final List<String> args) {
if (!this.filterIn.isEmpty() && !this.filterOut.isEmpty()) {
LOG.error("Option filterIn/filterOut both defined.");
return -1;
}
if (this.inverse && (!this.filterIn.isEmpty() || !this.filterOut.isEmpty())) {
LOG.error("Option inverse cannot be used when Option filterin/filterou is defined.");
return -1;
}
String databaseVcfUri;
String userVcfUri;
if (args.size() == 1) {
databaseVcfUri = args.get(0);
userVcfUri = null;
} else if (args.size() == 2) {
databaseVcfUri = args.get(0);
userVcfUri = args.get(1);
} else {
LOG.error("illegal number of arguments");
return -1;
}
VariantContextWriter w = null;
VcfIterator in = null;
try {
in = (userVcfUri == null ? VCFUtils.createVcfIteratorFromInputStream(stdin()) : VCFUtils.createVcfIterator(userVcfUri));
w = super.openVariantContextWriter(outputFile);
if (this.databaseIsIndexed) {
return this.scanUsingTabix(w, databaseVcfUri, in);
} else {
return this.scanFileSorted(w, databaseVcfUri, in);
}
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(in);
CloserUtil.close(w);
}
}
use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.
the class FindCorruptedFiles method testVcf.
private void testVcf(File f, InputStream in) throws IOException, TribbleException {
long n = 0;
VcfIterator iter = new VcfIteratorImpl(in);
iter.getHeader();
while (iter.hasNext() && (NUM < 0 || n < NUM)) {
iter.next();
++n;
}
if (n == 0) {
emptyFile(f);
}
iter.close();
}
use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.
the class VcfLiftOver method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
VariantContextWriter failed = null;
GenomicSequence genomicSequence = null;
try {
final VCFHeader inputHeader = in.getHeader();
final Set<VCFHeaderLine> headerLines = inputHeader.getMetaDataInInputOrder().stream().filter(V -> {
if (!(V instanceof VCFInfoHeaderLine))
return true;
final VCFInfoHeaderLine vih = VCFInfoHeaderLine.class.cast(V);
if (removeInfo.contains(vih.getID()))
return false;
return true;
}).collect(Collectors.toSet());
if (this.failedFile != null) {
final VCFHeader header2 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header2.addMetaDataLine(new VCFInfoHeaderLine(this.failedinfoTag, 1, VCFHeaderLineType.String, "Why the liftOver failed."));
failed = super.openVariantContextWriter(failedFile);
failed.writeHeader(header2);
}
final VCFHeader header3 = new VCFHeader(headerLines, inputHeader.getSampleNamesInOrder());
header3.setSequenceDictionary(this.indexedFastaSequenceFile.getSequenceDictionary());
header3.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header3.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header3.addMetaDataLine(new VCFInfoHeaderLine(this.infoTag, 1, VCFHeaderLineType.String, "Chromosome|Position before liftOver."));
out.writeHeader(header3);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(in.getHeader());
while (in.hasNext()) {
VariantContext ctx = progress.watch(in.next());
if (!this.removeInfo.isEmpty()) {
VariantContextBuilder vcb = new VariantContextBuilder(ctx);
for (final String tag : this.removeInfo) vcb.rmAttribute(tag);
ctx = vcb.make();
}
if (ctx.isIndel() && this.ignoreIndels) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "Indel").make());
continue;
}
if (adaptivematch) {
double minAlleleLength = Math.min(0, ctx.getAlleles().stream().mapToInt(A -> A.length()).min().orElse(0));
double maxAlleleLength = Math.max(1, ctx.getAlleles().stream().mapToInt(A -> A.length()).max().orElse(1));
this.liftOver.setLiftOverMinMatch(minAlleleLength / maxAlleleLength);
}
final Interval lifted = liftOver.liftOver(new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), // negative strand
false, String.join("|", ctx.getContig(), String.valueOf(ctx.getStart()), ctx.getReference().toString())));
if (lifted == null) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "LiftOverFailed").make());
} else if (this.indexedFastaSequenceFile.getSequenceDictionary().getSequence(lifted.getContig()) == null) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "ContigMissingDictionary|" + lifted.getContig()).make());
} else {
boolean alleleAreValidatedVsRef = true;
// part of the code was copied from picard/liftovervcf
final Map<Allele, Allele> reverseComplementAlleleMap = new HashMap<>();
final List<Allele> alleles = new ArrayList<Allele>();
for (final Allele oldAllele : ctx.getAlleles()) {
final Allele fixedAllele;
if (oldAllele.isSymbolic() || oldAllele.isNoCall() || oldAllele.equals(Allele.SPAN_DEL)) {
alleles.add(oldAllele);
continue;
} else if (lifted.isPositiveStrand()) {
fixedAllele = oldAllele;
alleles.add(oldAllele);
} else {
fixedAllele = Allele.create(SequenceUtil.reverseComplement(oldAllele.getBaseString()), oldAllele.isReference());
alleles.add(fixedAllele);
reverseComplementAlleleMap.put(oldAllele, fixedAllele);
}
if (this.checkAlleleSequence) {
if (genomicSequence == null || !genomicSequence.getChrom().equals(lifted.getContig())) {
genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, lifted.getContig());
}
final String alleleStr = fixedAllele.getBaseString();
int x = 0;
while (x < alleleStr.length() && lifted.getStart() - 1 + x < genomicSequence.length()) {
final char refChar = genomicSequence.charAt(lifted.getStart() - 1 + x);
if (Character.toLowerCase(refChar) != Character.toLowerCase(alleleStr.charAt(x))) {
alleleAreValidatedVsRef = false;
break;
}
++x;
}
if (x != alleleStr.length()) {
alleleAreValidatedVsRef = false;
break;
}
}
}
if (!alleleAreValidatedVsRef) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleMismatchRef").make());
continue;
}
if (lifted.getEnd() - lifted.getStart() != ctx.getEnd() - ctx.getStart()) {
if (failed != null)
failed.add(new VariantContextBuilder(ctx).attribute(this.failedinfoTag, "AlleleBadLength|" + lifted.length()).make());
continue;
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx.getSource(), lifted.getContig(), lifted.getStart(), lifted.getEnd(), alleles);
vcb.id(ctx.getID());
vcb.attributes(ctx.getAttributes());
vcb.attribute(this.infoTag, ctx.getContig() + "|" + ctx.getStart() + "|" + ctx.getReference().getDisplayString());
vcb.filters(ctx.getFilters());
vcb.log10PError(ctx.getLog10PError());
final GenotypesContext genotypeContext = ctx.getGenotypes();
final GenotypesContext fixedGenotypes = GenotypesContext.create(genotypeContext.size());
for (final Genotype genotype : genotypeContext) {
final List<Allele> fixedAlleles = new ArrayList<Allele>();
for (final Allele allele : genotype.getAlleles()) {
final Allele fixedAllele = reverseComplementAlleleMap.containsKey(allele) ? reverseComplementAlleleMap.get(allele) : allele;
fixedAlleles.add(fixedAllele);
}
fixedGenotypes.add(new GenotypeBuilder(genotype).alleles(fixedAlleles).make());
}
vcb.genotypes(fixedGenotypes);
out.add(vcb.make());
}
}
if (failed != null) {
failed.close();
failed = null;
}
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(failed);
}
}
Aggregations