use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.
the class Biostar175929 method doWork.
@Override
public int doWork(List<String> args) {
if (this.faidx == null) {
LOG.error("fasta reference was not defined.");
return -1;
}
IndexedFastaSequenceFile reference = null;
VcfIterator iter = null;
try {
reference = new IndexedFastaSequenceFile(this.faidx);
iter = super.openVcfIterator(oneFileOrNull(args));
this.pw = openFileOrStdoutAsPrintWriter(this.outputFile);
final List<VariantContext> variants = new ArrayList<>();
for (; ; ) {
VariantContext ctx = null;
if (iter.hasNext()) {
ctx = iter.next();
}
if (ctx == null || (!variants.isEmpty() && !ctx.getContig().equals(variants.get(0).getContig()))) {
if (!variants.isEmpty()) {
LOG.info("chrom:" + variants.get(0).getContig() + " N=" + variants.size());
final GenomicSequence genomic = new GenomicSequence(reference, variants.get(0).getContig());
final StringBuilder title = new StringBuilder();
final StringBuilder sequence = new StringBuilder();
recursive(genomic, variants, 0, title, sequence);
variants.clear();
}
if (ctx == null)
break;
}
variants.add(ctx);
}
iter.close();
iter = null;
this.pw.flush();
this.pw.close();
return RETURN_OK;
} catch (Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(reference);
CloserUtil.close(iter);
CloserUtil.close(pw);
}
}
use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.
the class KnimeVariantHelper method processVcfMulti.
/**
* process the VCF file,
*
* @param vcfIn input file name
* @param fun functional
* @return the output file name
* @throws IOException
*/
public String processVcfMulti(final String vcfIn, final Function<VariantContext, List<VariantContext>> fun) throws IOException {
this.lastVariantCount = 0;
if (vcfIn == null) {
final String msg = "Vcf Input URI/FIle is null.";
LOG.error(msg);
throw new IllegalArgumentException(msg);
}
File outVcfFile = null;
File outVcfIndexFile = null;
final File STOP_FILE = new File(this.workfingDirectory, "STOP");
if (STOP_FILE.exists()) {
final String msg = "There is a stop file in " + STOP_FILE;
LOG.error(msg);
throw new IOException(msg);
}
boolean fail_flag = false;
VcfIterator iter = null;
VariantContextWriter variantContextWriter = null;
try {
IOUtil.assertDirectoryIsReadable(this.workfingDirectory);
IOUtil.assertDirectoryIsWritable(this.workfingDirectory);
if (!IOUtil.isUrl(vcfIn)) {
IOUtil.assertFileIsReadable(new File(vcfIn));
}
final String extension;
if (this.forceSuffix.equals(ForceSuffix.ForceTabix)) {
extension = ".vcf.gz";
} else if (this.forceSuffix.equals(ForceSuffix.ForceTribble)) {
extension = ".vcf";
} else if (vcfIn.endsWith(".gz")) {
extension = ".vcf.gz";
} else {
extension = ".vcf";
}
final String filename = this.createOutputFile(vcfIn, extension);
final String indexFilename;
if (extension.endsWith(".gz")) {
indexFilename = filename + Tribble.STANDARD_INDEX_EXTENSION;
} else {
indexFilename = filename + TabixUtils.STANDARD_INDEX_EXTENSION;
}
outVcfFile = new File(filename);
outVcfIndexFile = new File(indexFilename);
LOG.info("opening " + vcfIn);
iter = VCFUtils.createVcfIterator(vcfIn);
super.init(iter.getHeader());
final VCFHeader vcfHeader2;
if (this.getExtraVcfHeaderLines().isEmpty()) {
vcfHeader2 = iter.getHeader();
} else {
vcfHeader2 = new VCFHeader(iter.getHeader());
for (final VCFHeaderLine extra : this.getExtraVcfHeaderLines()) {
vcfHeader2.addMetaDataLine(extra);
}
// clear vcf header line now they 've been added to the header.
this.getExtraVcfHeaderLines().clear();
}
final SAMSequenceDictionary dict = this.getHeader().getSequenceDictionary();
if (dict == null) {
final String msg = "There is no dictionary (##contig lines) in " + vcfIn + " but they are required.";
LOG.error(msg);
throw new IllegalArgumentException(msg);
}
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
progress.setLogPrefix(this.filePrefix);
LOG.info("writing " + outVcfFile + ". Emergency stop file is " + STOP_FILE);
variantContextWriter = this.variantContextWriterBuilder.setOutputFile(outVcfFile).setReferenceDictionary(dict).build();
long lastTick = System.currentTimeMillis();
variantContextWriter.writeHeader(vcfHeader2);
while (iter.hasNext()) {
final VariantContext ctx = progress.watch(iter.next());
final List<VariantContext> array = fun.apply(ctx);
if (array != null) {
for (final VariantContext ctx2 : array) {
variantContextWriter.add(ctx2);
this.lastVariantCount++;
}
}
// check STOP File
final long now = System.currentTimeMillis();
if (// 10sec
(now - lastTick) > 10 * 1000) {
lastTick = now;
if (STOP_FILE.exists()) {
LOG.warn("STOP FILE detected " + STOP_FILE + " Aborting.");
fail_flag = true;
break;
}
}
}
progress.finish();
iter.close();
iter = null;
variantContextWriter.close();
variantContextWriter = null;
return outVcfFile.getPath();
} catch (final Exception err) {
fail_flag = true;
LOG.error(err);
throw new IOException(err);
} finally {
CloserUtil.close(iter);
CloserUtil.close(variantContextWriter);
if (fail_flag) {
if (outVcfFile != null && outVcfFile.exists()) {
LOG.warn("deleting " + outVcfFile);
outVcfFile.delete();
}
if (outVcfIndexFile != null && outVcfIndexFile.exists()) {
LOG.warn("deleting " + outVcfIndexFile);
outVcfIndexFile.delete();
}
}
}
}
use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.
the class Launcher method doVcfToVcf.
protected int doVcfToVcf(final String inputNameOrNull, final File outorNull) {
VcfIterator iterin = null;
VariantContextWriter w = null;
try {
iterin = openVcfIterator(inputNameOrNull);
w = openVariantContextWriter(outorNull);
int ret = doVcfToVcf(inputNameOrNull == null ? "<STDIN>" : inputNameOrNull, iterin, w);
w.close();
w = null;
iterin.close();
iterin = null;
return ret;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iterin);
CloserUtil.close(w);
}
}
use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.
the class VcfBiomart method doVcfToVcf.
@Override
protected int doVcfToVcf(final String inputName, final VcfIterator iter, final VariantContextWriter out) {
HttpGet httpGet = null;
final Pattern tab = Pattern.compile("[\t]");
try {
final TransformerFactory factory = TransformerFactory.newInstance();
final Transformer transformer = factory.newTransformer();
// transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
final VCFHeader header = iter.getHeader();
StringBuilder desc = new StringBuilder("Biomart query. Format: ");
desc.append(this.attributes.stream().map(S -> this.printLabels ? S + "|" + S : S).collect(Collectors.joining("|")));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header.addMetaDataLine(new VCFInfoHeaderLine(this.TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, desc.toString()));
out.writeHeader(header);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
while (iter.hasNext()) {
final VariantContext ctx = progress.watch(iter.next());
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.rmAttribute(this.TAG);
this.filterColumnContig.set(ctx.getContig());
this.filterColumnStart.set(String.valueOf(ctx.getStart()));
this.filterColumnEnd.set(String.valueOf(ctx.getEnd()));
final StringWriter domToStr = new StringWriter();
transformer.transform(new DOMSource(this.domQuery), new StreamResult(domToStr));
final URIBuilder builder = new URIBuilder(this.serviceUrl);
builder.addParameter("query", domToStr.toString());
// System.err.println("\nwget -O - 'http://grch37.ensembl.org/biomart/martservice?query="+escapedQuery+"'\n");
// escapedQuery = URLEncoder.encode(escapedQuery,"UTF-8");
httpGet = new HttpGet(builder.build());
final CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
int responseCode = httpResponse.getStatusLine().getStatusCode();
if (responseCode != 200) {
throw new RuntimeIOException("Response code was not 200. Detected response was " + responseCode);
}
InputStream response = httpResponse.getEntity().getContent();
if (this.teeResponse) {
response = new TeeInputStream(response, stderr(), false);
}
final BufferedReader br = new BufferedReader(new InputStreamReader(response));
final Set<String> infoAtts = br.lines().filter(L -> !StringUtil.isBlank(L)).filter(L -> !L.equals("[success]")).map(L -> tab.split(L)).map(T -> {
final StringBuilder sb = new StringBuilder();
for (int i = 0; i < this.attributes.size(); i++) {
if (i > 0)
sb.append("|");
if (this.printLabels)
sb.append(escapeInfo(this.attributes.get(i))).append("|");
sb.append(i < T.length ? escapeInfo(T[i]) : "");
}
return sb.toString();
}).collect(Collectors.toCollection(LinkedHashSet::new));
CloserUtil.close(br);
CloserUtil.close(response);
CloserUtil.close(httpResponse);
if (!infoAtts.isEmpty()) {
vcb.attribute(this.TAG, new ArrayList<>(infoAtts));
}
out.add(vcb.make());
}
progress.finish();
return 0;
} catch (final Exception err) {
LOG.error(err);
throw new RuntimeIOException(err);
}
}
use of com.github.lindenb.jvarkit.util.vcf.VcfIterator in project jvarkit by lindenb.
the class VcfEnsemblReg method annotate.
private void annotate(Track track, File inf, File outf) throws IOException {
boolean contained = false;
LOG.info("Processing " + track.id + " (" + track.shortLabel + ") " + track.url);
VcfIterator in = VCFUtils.createVcfIteratorFromFile(inf);
VCFHeader header = in.getHeader();
VCFInfoHeaderLine info = null;
SeekableStream sstream = SeekableStreamFactory.getInstance().getStreamFor(track.url);
BBFileReader bigFile = new BBFileReader(track.url.toString(), new SeekableStreamAdaptor(sstream));
VariantContextWriter w1 = VCFUtils.createVariantContextWriter(outf);
if (bigFile.isBigWigFile()) {
info = new VCFInfoHeaderLine(track.id, 1, VCFHeaderLineType.Float, String.valueOf(track.longLabel) + " " + track.url);
} else {
info = new VCFInfoHeaderLine(track.id, 1, VCFHeaderLineType.String, String.valueOf(track.longLabel) + " " + track.url);
}
header.addMetaDataLine(info);
w1.writeHeader(in.getHeader());
while (in.hasNext()) {
VariantContext ctx = in.next();
String chrom = ctx.getContig();
if (!chrom.startsWith("chr"))
chrom = "chr" + chrom;
if (!chrom.matches("(chrX|chrY|chr[0-9]|chr1[0-9]|chr2[12])")) {
w1.add(ctx);
} else if (bigFile.isBigWigFile()) {
BigWigIterator iter = bigFile.getBigWigIterator(chrom, ctx.getStart() - 1, chrom, ctx.getStart(), contained);
Float wigValue = null;
while (iter != null && iter.hasNext() && wigValue == null) {
WigItem item = iter.next();
wigValue = item.getWigValue();
}
if (wigValue == null) {
w1.add(ctx);
continue;
}
VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.attribute(track.id, wigValue);
w1.add(vcb.make());
} else {
BigBedIterator iter = bigFile.getBigBedIterator(chrom, ctx.getStart() - 1, chrom, ctx.getStart(), contained);
Set<String> bedValues = new HashSet<String>();
while (iter != null && iter.hasNext()) {
BedFeature item = iter.next();
String[] rest = item.getRestOfFields();
if (rest == null || rest.length != 6) {
System.err.println(track.id + " " + Arrays.toString(item.getRestOfFields()));
continue;
}
String color = null;
if (track.parent != null) {
if (track.parent.startsWith("Segway_17SegmentationSummaries")) {
color = segway_17SegmentationSummaries(rest[5]);
} else if (track.parent.startsWith("ProjectedSegments")) {
color = projectedSegments(rest[5]);
} else if (track.parent.startsWith("RegBuildOverview")) {
color = regBuildOverview(rest[5]);
} else if (track.parent.startsWith("Segway_17CellSegments")) {
color = segway_17CellSegments(rest[5]);
} else {
System.err.println("Unknown parent:" + track.parent);
}
}
if (color == null)
continue;
bedValues.add(rest[0] + "|" + color);
}
if (bedValues.isEmpty()) {
w1.add(ctx);
continue;
}
StringBuilder sb = new StringBuilder();
for (String s : bedValues) {
if (sb.length() != 0)
sb.append(",");
sb.append(s);
}
VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.attribute(track.id, sb.toString());
w1.add(vcb.make());
}
}
sstream.close();
in.close();
w1.close();
}
Aggregations