use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class VcfCompareCallersOneSample method doWork.
@Override
public int doWork(final List<String> args) {
File inputFile = null;
final List<EqualRangeVcfIterator> listChallengers = new ArrayList<>();
VariantContextWriter vcw = null;
VCFIterator in = null;
try {
in = super.openVCFIterator(oneFileOrNull(args));
VCFHeader header = in.getHeader();
if (header.getNGenotypeSamples() != 1) {
LOG.error("vcf must have only one sample");
return -1;
}
final VCFHeader h2 = new VCFHeader(header);
h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
h2.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
JVarkitVersion.getInstance().addMetaData(getClass().getSimpleName(), h2);
SAMSequenceDictionary dict = header.getSequenceDictionary();
if (dict == null) {
LOG.error(JvarkitException.VcfDictionaryMissing.getMessage("input"));
return -1;
}
final Comparator<VariantContext> ctxComparator = VCFUtils.createTidPosComparator(dict);
/* load files to be challenged */
for (final File cf : this.challengerVcf) {
// do not challenge vs itself
if (inputFile != null && inputFile.equals(cf)) {
LOG.error("Ignoring challenger (self): " + cf);
continue;
}
VCFIterator cin = VCFUtils.createVCFIteratorFromFile(cf);
VCFHeader ch = cin.getHeader();
if (ch.getNGenotypeSamples() != 1) {
LOG.warning("vcf.must.have.only.one.sample");
cin.close();
continue;
}
if (!header.getSampleNamesInOrder().get(0).equals(ch.getSampleNamesInOrder().get(0))) {
LOG.warning("Ignoring " + cf + " because not the same sample.");
cin.close();
continue;
}
SAMSequenceDictionary hdict = ch.getSequenceDictionary();
if (hdict == null || !SequenceUtil.areSequenceDictionariesEqual(dict, hdict)) {
LOG.error("not.the.same.sequence.dictionaries");
return -1;
}
listChallengers.add(new EqualRangeVcfIterator(cin, ctxComparator));
}
vcw = super.openVariantContextWriter(outputFile);
vcw.writeHeader(h2);
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
VariantContext prev_ctx = null;
while (in.hasNext() && !vcw.checkError()) {
VariantContext ctx = progress.watch(in.next());
// check input order
if (prev_ctx != null && ctxComparator.compare(prev_ctx, ctx) > 0) {
LOG.error("bad sort order : got\n\t" + prev_ctx + "\nbefore\n\t" + ctx + "\n");
return -1;
}
prev_ctx = ctx;
int countInOtherFiles = 0;
for (final EqualRangeVcfIterator citer : listChallengers) {
boolean foundInThatFile = false;
final List<VariantContext> ctxChallenging = citer.next(ctx);
for (final VariantContext ctx2 : ctxChallenging) {
if (!ctx2.getReference().equals(ctx.getReference()))
continue;
boolean ok = true;
if (!this.ignoreAlternate) {
Set<Allele> myAlt = new HashSet<Allele>(ctx.getAlternateAlleles());
myAlt.removeAll(ctx2.getAlternateAlleles());
if (!myAlt.isEmpty())
ok = false;
}
if (ok) {
foundInThatFile = true;
break;
}
}
countInOtherFiles += (foundInThatFile ? 1 : 0);
}
if (countInOtherFiles >= minCountInclusive && countInOtherFiles <= maxCountInclusive) {
vcw.add(ctx);
}
}
progress.finish();
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(vcw);
CloserUtil.close(listChallengers);
CloserUtil.close(in);
}
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class KnimeVariantHelper method processVcfMulti.
/**
* process the VCF file,
*
* @param vcfIn input file name
* @param fun functional
* @return the output file name
* @throws IOException
*/
public String processVcfMulti(final String vcfIn, final Function<VariantContext, List<VariantContext>> fun) throws IOException {
this.lastVariantCount = 0;
if (vcfIn == null) {
final String msg = "Vcf Input URI/FIle is null.";
LOG.error(msg);
throw new IllegalArgumentException(msg);
}
File outVcfFile = null;
File outVcfIndexFile = null;
final File STOP_FILE = new File(this.workfingDirectory, "STOP");
if (STOP_FILE.exists()) {
final String msg = "There is a stop file in " + STOP_FILE;
LOG.error(msg);
throw new IOException(msg);
}
boolean fail_flag = false;
VCFIterator iter = null;
VariantContextWriter variantContextWriter = null;
try {
IOUtil.assertDirectoryIsReadable(this.workfingDirectory);
IOUtil.assertDirectoryIsWritable(this.workfingDirectory);
if (!IOUtil.isUrl(vcfIn)) {
IOUtil.assertFileIsReadable(new File(vcfIn));
}
final String extension;
if (this.forceSuffix.equals(ForceSuffix.ForceTabix)) {
extension = ".vcf.gz";
} else if (this.forceSuffix.equals(ForceSuffix.ForceTribble)) {
extension = ".vcf";
} else if (vcfIn.endsWith(".gz")) {
extension = ".vcf.gz";
} else {
extension = ".vcf";
}
final String filename = this.createOutputFile(vcfIn, extension);
final String indexFilename;
if (extension.endsWith(".gz")) {
indexFilename = filename + FileExtensions.TRIBBLE_INDEX;
} else {
indexFilename = filename + FileExtensions.TABIX_INDEX;
}
outVcfFile = new File(filename);
outVcfIndexFile = new File(indexFilename);
LOG.info("opening " + vcfIn);
iter = VCFUtils.createVCFIterator(vcfIn);
super.init(iter.getHeader());
final VCFHeader vcfHeader2;
if (this.getExtraVcfHeaderLines().isEmpty()) {
vcfHeader2 = iter.getHeader();
} else {
vcfHeader2 = new VCFHeader(iter.getHeader());
for (final VCFHeaderLine extra : this.getExtraVcfHeaderLines()) {
vcfHeader2.addMetaDataLine(extra);
}
// clear vcf header line now they 've been added to the header.
this.getExtraVcfHeaderLines().clear();
}
final SAMSequenceDictionary dict = this.getHeader().getSequenceDictionary();
if (dict == null) {
final String msg = "There is no dictionary (##contig lines) in " + vcfIn + " but they are required.";
LOG.error(msg);
throw new IllegalArgumentException(msg);
}
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
progress.setLogPrefix(this.filePrefix);
LOG.info("writing " + outVcfFile + ". Emergency stop file is " + STOP_FILE);
variantContextWriter = this.variantContextWriterBuilder.setOutputFile(outVcfFile).setReferenceDictionary(dict).build();
long lastTick = System.currentTimeMillis();
variantContextWriter.writeHeader(vcfHeader2);
while (iter.hasNext()) {
final VariantContext ctx = progress.watch(iter.next());
final List<VariantContext> array = fun.apply(ctx);
if (array != null) {
for (final VariantContext ctx2 : array) {
variantContextWriter.add(ctx2);
this.lastVariantCount++;
}
}
// check STOP File
final long now = System.currentTimeMillis();
if (// 10sec
(now - lastTick) > 10 * 1000) {
lastTick = now;
if (STOP_FILE.exists()) {
LOG.warn("STOP FILE detected " + STOP_FILE + " Aborting.");
fail_flag = true;
break;
}
}
}
progress.finish();
iter.close();
iter = null;
variantContextWriter.close();
variantContextWriter = null;
return outVcfFile.getPath();
} catch (final Exception err) {
fail_flag = true;
LOG.error(err);
throw new IOException(err);
} finally {
CloserUtil.close(iter);
CloserUtil.close(variantContextWriter);
if (fail_flag) {
if (outVcfFile != null && outVcfFile.exists()) {
LOG.warn("deleting " + outVcfFile);
outVcfFile.delete();
}
if (outVcfIndexFile != null && outVcfIndexFile.exists()) {
LOG.warn("deleting " + outVcfIndexFile);
outVcfIndexFile.delete();
}
}
}
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class OnePassVcfLauncher method doWork.
@Override
public int doWork(final List<String> args) {
VCFIterator in = null;
VariantContextWriter vcw = null;
final String input = super.oneFileOrNull(args);
if (input != null && this.outputFile != null && !IOUtil.isUrl(input) && Paths.get(input).equals(this.outputFile)) {
LOG.error("Input == output : " + input);
return -1;
}
try {
if (beforeVcf() != 0) {
LOG.error("initialization failed");
return -1;
}
} catch (final Throwable err) {
LOG.error(err);
return -1;
}
try {
final BcfIteratorBuilder bcb = new BcfIteratorBuilder();
if (input == null) {
in = bcb.open(stdin());
} else {
in = bcb.open(input);
}
if (getLogger() != null) {
in = new VCFIter(in, getLogger());
}
vcw = this.writingVariantsDelegate.dictionary(in.getHeader()).open(this.outputFile);
final int err = doVcfToVcf(input == null ? "<stdin>" : input, in, vcw);
vcw.close();
vcw = null;
in.close();
in = null;
if (err != 0)
deleteOutputOnError();
return err;
} catch (final Throwable err) {
LOG.error(err);
if (vcw != null)
vcw.close();
deleteOutputOnError();
return -1;
} finally {
try {
afterVcf();
} catch (final Throwable err) {
LOG.error(err);
}
if (in != null)
in.close();
if (vcw != null)
vcw.close();
}
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class VcfEvaiAnnot method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VCFIterator iterin, final VariantContextWriter out) {
final VCFHeader header0 = iterin.getHeader();
final VCFHeader header2 = new VCFHeader(header0);
final Set<VCFHeaderLine> meta = new HashSet<>();
/* evai fields */
this.all_evai.stream().flatMap(T -> T.column2index.keySet().stream()).filter(T -> isEvaiBooleanField(T)).map(T -> new VCFInfoHeaderLine(EVAI_PFX + T, 1, VCFHeaderLineType.Integer, T)).forEach(H -> meta.add(H));
meta.add(new VCFInfoHeaderLine(EVAI_PFX + "FINAL_CLASSIFICATION", 1, VCFHeaderLineType.String, "FINAL_CLASSIFICATION"));
meta.add(new VCFInfoHeaderLine(INTERVAR_PFX + "PVS1", 1, VCFHeaderLineType.Integer, "From intervar"));
for (int i = 0; i < 5; i++) meta.add(new VCFInfoHeaderLine(INTERVAR_PFX + "PS" + i, 1, VCFHeaderLineType.Integer, "From intervar"));
for (int i = 0; i < 7; i++) meta.add(new VCFInfoHeaderLine(INTERVAR_PFX + "PM" + i, 1, VCFHeaderLineType.Integer, "From intervar"));
for (int i = 0; i < 6; i++) meta.add(new VCFInfoHeaderLine(INTERVAR_PFX + "PP" + i, 1, VCFHeaderLineType.Integer, "From intervar"));
meta.add(new VCFInfoHeaderLine(INTERVAR_PFX + "BA1", 1, VCFHeaderLineType.Integer, "From intervar"));
for (int i = 0; i < 5; i++) meta.add(new VCFInfoHeaderLine(INTERVAR_PFX + "BS" + i, 1, VCFHeaderLineType.Integer, "From intervar"));
for (int i = 0; i < 8; i++) meta.add(new VCFInfoHeaderLine(INTERVAR_PFX + "BP" + i, 1, VCFHeaderLineType.Integer, "From intervar"));
meta.add(new VCFInfoHeaderLine(INTERVAR_PFX + "_CLASSIFICATION", 1, VCFHeaderLineType.String, "FINAL_CLASSIFICATION"));
meta.stream().forEach(M -> header2.addMetaDataLine(M));
JVarkitVersion.getInstance().addMetaData(this, header2);
final ProgressFactory.Watcher<VariantContext> progress = ProgressFactory.newInstance().dictionary(header0).logger(LOG).build();
out.writeHeader(header2);
while (iterin.hasNext()) {
final VariantContext ctx = progress.apply(iterin.next());
if (this.ignore_filtered && ctx.isFiltered()) {
out.add(ctx);
continue;
}
final Map<String, Object> attributes = new HashMap<>();
challengeEvai(attributes, ctx);
challengeIntervar(attributes, ctx);
if (attributes.isEmpty()) {
out.add(ctx);
} else {
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
for (final String k : attributes.keySet()) vcb.attribute(k, attributes.get(k));
out.add(vcb.make());
}
}
progress.close();
return 0;
}
use of htsjdk.variant.vcf.VCFIterator in project jvarkit by lindenb.
the class VcfEnsemblReg method annotate.
private void annotate(Track track, File inf, File outf) throws IOException {
boolean contained = false;
LOG.info("Processing " + track.id + " (" + track.shortLabel + ") " + track.url);
VCFIterator in = VCFUtils.createVCFIteratorFromFile(inf);
VCFHeader header = in.getHeader();
VCFInfoHeaderLine info = null;
BBFileReader bigFile = new BBFileReader(track.url.toString());
VariantContextWriter w1 = VCFUtils.createVariantContextWriter(outf);
if (bigFile.isBigWigFile()) {
info = new VCFInfoHeaderLine(track.id, 1, VCFHeaderLineType.Float, String.valueOf(track.longLabel) + " " + track.url);
} else {
info = new VCFInfoHeaderLine(track.id, 1, VCFHeaderLineType.String, String.valueOf(track.longLabel) + " " + track.url);
}
header.addMetaDataLine(info);
w1.writeHeader(in.getHeader());
while (in.hasNext()) {
VariantContext ctx = in.next();
String chrom = ctx.getContig();
if (!chrom.startsWith("chr"))
chrom = "chr" + chrom;
if (!chrom.matches("(chrX|chrY|chr[0-9]|chr1[0-9]|chr2[12])")) {
w1.add(ctx);
} else if (bigFile.isBigWigFile()) {
BigWigIterator iter = bigFile.getBigWigIterator(chrom, ctx.getStart() - 1, chrom, ctx.getStart(), contained);
Float wigValue = null;
while (iter != null && iter.hasNext() && wigValue == null) {
WigItem item = iter.next();
wigValue = item.getWigValue();
}
if (wigValue == null) {
w1.add(ctx);
continue;
}
VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.attribute(track.id, wigValue);
w1.add(vcb.make());
} else {
BigBedIterator iter = bigFile.getBigBedIterator(chrom, ctx.getStart() - 1, chrom, ctx.getStart(), contained);
Set<String> bedValues = new HashSet<String>();
while (iter != null && iter.hasNext()) {
BedFeature item = iter.next();
String[] rest = item.getRestOfFields();
if (rest == null || rest.length != 6) {
System.err.println(track.id + " " + Arrays.toString(item.getRestOfFields()));
continue;
}
String color = null;
if (track.parent != null) {
if (track.parent.startsWith("Segway_17SegmentationSummaries")) {
color = segway_17SegmentationSummaries(rest[5]);
} else if (track.parent.startsWith("ProjectedSegments")) {
color = projectedSegments(rest[5]);
} else if (track.parent.startsWith("RegBuildOverview")) {
color = regBuildOverview(rest[5]);
} else if (track.parent.startsWith("Segway_17CellSegments")) {
color = segway_17CellSegments(rest[5]);
} else {
System.err.println("Unknown parent:" + track.parent);
}
}
if (color == null)
continue;
bedValues.add(rest[0] + "|" + color);
}
if (bedValues.isEmpty()) {
w1.add(ctx);
continue;
}
StringBuilder sb = new StringBuilder();
for (String s : bedValues) {
if (sb.length() != 0)
sb.append(",");
sb.append(s);
}
VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.attribute(track.id, sb.toString());
w1.add(vcb.make());
}
}
in.close();
w1.close();
}
Aggregations