use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class HaloplexParasite method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter out) {
SamReader samReader = null;
final List<Mutation> mutations = new ArrayList<>();
try {
final Set<File> bamFiles = Files.lines(this.bamList.toPath()).filter(T -> !(T.isEmpty() || T.startsWith("#"))).map(T -> new File(T)).collect(Collectors.toSet());
final VCFHeader header = new VCFHeader();
header.setSequenceDictionary(in.getHeader().getSequenceDictionary());
final VCFFilterHeaderLine filter = new VCFFilterHeaderLine("HALOPLEXPARASITE", "fails Parasite Haloplex Sequence");
final VCFInfoHeaderLine infoWord = new VCFInfoHeaderLine(filter.getID(), 1, VCFHeaderLineType.String, "Parasite Haloplex Sequence (Word|Count|Fraction)");
super.addMetaData(header);
out.writeHeader(header);
header.addMetaDataLine(filter);
header.addMetaDataLine(infoWord);
while (in.hasNext()) {
final VariantContext ctx = in.next();
final VariantContextBuilder vcb = new VariantContextBuilder(inputName, ctx.getContig(), ctx.getStart(), ctx.getEnd(), ctx.getAlleles());
if (!(ctx.isIndel() || ctx.isMixed())) {
out.add(vcb.make());
continue;
}
if (!vcb.getAlleles().stream().filter(A -> !(A.isSymbolic() || A.isNoCall() || A.length() < this.minClipSize)).findAny().isPresent()) {
out.add(vcb.make());
continue;
}
final Mutation mut = new Mutation(ctx);
mutations.add(mut);
}
final Counter<String> words = new Counter<>();
for (final File bamFile : bamFiles) {
LOG.info("Opening " + bamFile);
samReader = createSamReaderFactory().open(bamFile);
for (final Mutation mut : mutations) {
// words seen in that mutation : don't use a Counter
final Set<String> mutWords = new HashSet<>();
/* loop over reads overlapping that mutation */
final SAMRecordIterator sri = samReader.queryOverlapping(mut.contig, mut.start, mut.end);
while (sri.hasNext()) {
final SAMRecord rec = sri.next();
if (rec.getReadUnmappedFlag())
continue;
if (rec.isSecondaryOrSupplementary())
continue;
if (rec.getDuplicateReadFlag())
continue;
if (rec.getReadFailsVendorQualityCheckFlag())
continue;
final Cigar cigar = rec.getCigar();
if (cigar.numCigarElements() == 1)
continue;
final byte[] bases = rec.getReadBases();
int refpos = rec.getUnclippedStart();
int readpos = 0;
/* scan cigar overlapping that mutation */
for (final CigarElement ce : cigar) {
final CigarOperator op = ce.getOperator();
final int ref_end = refpos + (op.consumesReferenceBases() || op.isClipping() ? ce.getLength() : 0);
final int read_end = readpos + (op.consumesReadBases() ? ce.getLength() : 0);
/* check clip is large enough */
if (op.equals(CigarOperator.S) && ce.getLength() >= this.minClipSize) {
/* check clip overlap mutation */
if (!(ref_end < mut.start || refpos > mut.end)) {
/* break read of soft clip into words */
for (int x = 0; x + this.minClipSize <= ce.getLength(); ++x) {
final String substr = new String(bases, readpos + x, this.minClipSize);
if (!substr.contains("N")) {
final String revcomp = AcidNucleics.reverseComplement(substr);
mutWords.add(substr);
if (!revcomp.equals(substr))
mutWords.add(revcomp);
}
}
}
}
refpos = ref_end;
readpos = read_end;
}
}
sri.close();
for (final String w : mutWords) {
words.incr(w);
}
}
// end of for(mutations)
samReader.close();
samReader = null;
}
LOG.info("mutations:" + mutations.size());
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
for (final Mutation mut : mutations) {
progress.watch(mut.contig, mut.start);
final VariantContextBuilder vcb = new VariantContextBuilder(inputName, mut.contig, mut.start, mut.end, mut.alleles);
String worstString = null;
Double worstFraction = null;
final double totalWords = words.getTotal();
for (final Allele a : mut.alleles) {
if (a.isSymbolic() || a.isNoCall() || a.length() < this.minClipSize)
continue;
for (int x = 0; x + this.minClipSize <= a.length(); ++x) {
final String substr = new String(a.getBases(), x, this.minClipSize);
final long count = words.count(substr);
final double fraction = count / totalWords;
if (worstFraction == null || worstFraction < fraction) {
worstString = substr + "|" + count + "|" + fraction;
worstFraction = fraction;
}
}
}
if (worstString != null) {
vcb.attribute(infoWord.getID(), worstString);
}
if (worstFraction != null && worstFraction.doubleValue() >= this.tresholdFraction) {
vcb.filter(filter.getID());
}
out.add(vcb.make());
}
progress.finish();
return RETURN_OK;
} catch (Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(samReader);
}
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class Biostar251649 method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VcfIterator in, VariantContextWriter w) {
try {
final VCFHeader header = new VCFHeader(in.getHeader());
VCFInfoHeaderLine info5 = new VCFInfoHeaderLine(leftTag + extend, 1, VCFHeaderLineType.String, "Sequence on the 5' of mutation");
VCFInfoHeaderLine info3 = new VCFInfoHeaderLine(rightTag + extend, 1, VCFHeaderLineType.String, "Sequence on the 3' of mutation");
if (header.getInfoHeaderLine(info5.getID()) != null) {
LOG.error("tag " + info5.getID() + " already present in VCF header");
return -1;
}
if (header.getInfoHeaderLine(info3.getID()) != null) {
LOG.error("tag " + info3.getID() + " already present in VCF header");
return -1;
}
header.addMetaDataLine(info5);
header.addMetaDataLine(info3);
GenomicSequence chrom = null;
w.writeHeader(header);
while (in.hasNext()) {
final VariantContext ctx = in.next();
if (chrom == null || !chrom.getChrom().equals(ctx.getContig())) {
chrom = new GenomicSequence(this.faidx, ctx.getContig());
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
if (ctx.getStart() > 0) {
final StringBuilder sb = new StringBuilder(this.extend);
for (int i = 0; i < this.extend; ++i) {
final int pos0 = (ctx.getStart() - 2) - i;
if (pos0 <= 0)
continue;
sb.insert(0, chrom.charAt(pos0));
}
if (sb.length() > 0)
vcb.attribute(info5.getID(), sb.toString());
}
{
final StringBuilder sb = new StringBuilder(this.extend);
for (int i = 0; i < this.extend; ++i) {
int pos0 = ctx.getEnd() + i;
if (pos0 >= chrom.length())
break;
sb.append(chrom.charAt(pos0));
}
if (sb.length() > 0)
vcb.attribute(info3.getID(), sb.toString());
}
w.add(vcb.make());
}
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(faidx);
}
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class VcfAnnotWithBeacon method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, final VcfIterator iter, final VariantContextWriter out) {
CloseableHttpClient httpClient = null;
InputStream contentInputStream = null;
try {
final org.apache.http.impl.client.HttpClientBuilder hb = HttpClients.custom();
if (this.ignoreCertErrors) {
// http://stackoverflow.com/questions/24720013/apache-http-client-ssl-certificate-error
System.setProperty("jsse.enableSNIExtension", "false");
final SSLContext sslContext = org.apache.http.conn.ssl.SSLContexts.custom().loadTrustMaterial(null, new org.apache.http.conn.ssl.TrustStrategy() {
@Override
public boolean isTrusted(final X509Certificate[] chain, final String authType) throws CertificateException {
return true;
}
}).useTLS().build();
final org.apache.http.conn.ssl.SSLConnectionSocketFactory connectionFactory = new org.apache.http.conn.ssl.SSLConnectionSocketFactory(sslContext, new org.apache.http.conn.ssl.AllowAllHostnameVerifier());
hb.setSSLSocketFactory(connectionFactory);
}
httpClient = hb.build();
HttpGet httpGetRequest = null;
final Set<String> available_chromosomes = new HashSet<>();
try {
httpGetRequest = new HttpGet(baseurl + "/chromosomes");
httpGetRequest.setHeader("Accept", ContentType.APPLICATION_JSON.getMimeType());
contentInputStream = httpClient.execute(httpGetRequest).getEntity().getContent();
JsonParser jsonparser = new JsonParser();
final JsonElement root = jsonparser.parse(new InputStreamReader(contentInputStream));
Iterator<JsonElement> jsr = root.getAsJsonArray().iterator();
while (jsr.hasNext()) {
final String ctg = jsr.next().getAsString();
available_chromosomes.add(ctg);
}
LOG.debug(available_chromosomes);
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(contentInputStream);
}
final Set<String> available_alleles = new HashSet<>();
try {
httpGetRequest = new HttpGet(baseurl + "/alleles");
httpGetRequest.setHeader("Accept", ContentType.APPLICATION_JSON.getMimeType());
contentInputStream = httpClient.execute(httpGetRequest).getEntity().getContent();
JsonParser jsonparser = new JsonParser();
final JsonElement root = jsonparser.parse(new InputStreamReader(contentInputStream));
Iterator<JsonElement> jsr = root.getAsJsonArray().iterator();
while (jsr.hasNext()) {
final String allele = jsr.next().getAsString();
available_alleles.add(allele);
}
LOG.debug(available_alleles);
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(contentInputStream);
}
final StoredResponseBinding storedResponseBinding = new StoredResponseBinding();
final VCFHeader header = new VCFHeader(iter.getHeader());
final VCFInfoHeaderLine infoHeaderLine = new VCFInfoHeaderLine(this.infoTag, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Tag inserted with " + getProgramName());
header.addMetaDataLine(infoHeaderLine);
DatabaseEntry key = new DatabaseEntry();
DatabaseEntry data = new DatabaseEntry();
out.writeHeader(header);
while (iter.hasNext()) {
final VariantContext ctx = iter.next();
if (!ctx.isVariant() || ctx.getReference().isSymbolic()) {
out.add(ctx);
continue;
}
if (ctx.hasAttribute(infoHeaderLine.getID()) && this.dontUpdateIfInfoIsPresent) {
out.add(ctx);
continue;
}
String beaconContig = ctx.getContig();
if (!available_chromosomes.contains(beaconContig)) {
if (beaconContig.startsWith("chr")) {
beaconContig = beaconContig.substring(3);
}
if (!available_chromosomes.contains(beaconContig)) {
out.add(ctx);
continue;
}
}
final List<Allele> altAlleles = ctx.getAlternateAlleles();
if (altAlleles.isEmpty()) {
out.add(ctx);
continue;
}
final Set<String> newInfo = new HashSet<>();
for (final Allele alt : altAlleles) {
if (alt.isSymbolic() || alt.isNoCall())
continue;
final StringBuilder buildUrl = new StringBuilder();
buildUrl.append("chrom=");
buildUrl.append(URLEncoder.encode(beaconContig, "UTF-8"));
buildUrl.append("&pos=");
/*
* "Coordinate within a chromosome. Position is a number and is 0-based"
* .
*/
buildUrl.append(ctx.getStart() - 1);
buildUrl.append("&allele=");
final String allele;
if (ctx.getReference().length() > alt.length()) {
// del
allele = "D";
} else if (ctx.getReference().length() > alt.length()) {
// ins
allele = "I";
} else {
allele = alt.getDisplayString();
}
if (!available_alleles.contains(allele))
continue;
buildUrl.append(allele);
buildUrl.append("&ref=");
buildUrl.append(URLEncoder.encode(this.genomeBuild, "UTF-8"));
final String queryUrl = buildUrl.toString();
boolean foundInBdb = false;
Set<String> foundIn = null;
if (this.beaconDatabase != null) {
StringBinding.stringToEntry(queryUrl, key);
if (this.beaconDatabase.get(this.txn, key, data, LockMode.DEFAULT) == OperationStatus.SUCCESS) {
StoredResponse response = storedResponseBinding.entryToObject(data);
if (// TODO check how old is
response.timeStamp < 0) // that data
{
response = null;
this.beaconDatabase.delete(this.txn, key);
}
if (response != null) {
foundInBdb = true;
foundIn = response.foundIn;
}
}
}
if (foundIn == null) {
foundIn = new HashSet<>();
try {
httpGetRequest = new HttpGet(baseurl + "/responses?" + queryUrl);
httpGetRequest.setHeader("Accept", ContentType.APPLICATION_JSON.getMimeType());
LOG.debug(httpGetRequest.getURI());
contentInputStream = httpClient.execute(httpGetRequest).getEntity().getContent();
JsonParser jsonparser = new JsonParser();
final JsonElement root = jsonparser.parse(new InputStreamReader(contentInputStream));
Iterator<JsonElement> jsr = root.getAsJsonArray().iterator();
while (jsr.hasNext()) {
final JsonObject b = jsr.next().getAsJsonObject();
if (!(b.has("beacon") && b.has("response")))
continue;
final String beacon_id = b.get("beacon").getAsJsonObject().get("id").getAsString();
final JsonElement response_prim = b.get("response");
if (response_prim.isJsonPrimitive() && response_prim.getAsBoolean()) {
foundIn.add(beacon_id);
}
}
} catch (final Exception err) {
LOG.error(err);
if (stopOnNetworkError) {
throw new RuntimeIOException(err);
}
} finally {
CloserUtil.close(contentInputStream);
}
}
if (this.beaconDatabase != null && !foundInBdb) {
StoredResponse response = new StoredResponse();
response.timeStamp = System.currentTimeMillis();
response.foundIn = foundIn;
}
// 17&pos=41244981&=G&ref=GRCh37")
newInfo.addAll(foundIn.stream().map(S -> alt.getDisplayString() + "|" + S).collect(Collectors.toSet()));
}
if (newInfo.isEmpty()) {
out.add(ctx);
continue;
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.attribute(infoHeaderLine.getID(), new ArrayList<String>(newInfo));
out.add(vcb.make());
}
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(httpClient);
}
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class EigenVariants method initialize.
@Override
public void initialize() {
logger.info("opening eigen directory :" + eigenDir);
IOUtil.assertDirectoryIsReadable(eigenDir);
this.annotator = new EigenInfoAnnotator(this.eigenDir);
if (this.tabixPrefix != null) {
logger.info("changing eigen file prefix from :" + this.annotator.getTabixPrefix() + " to " + this.tabixPrefix);
this.annotator.setTabixFilePrefix(this.tabixPrefix);
}
final VCFHeader header1 = super.getVcfHeader();
final VCFHeader h2 = new VCFHeader(header1);
for (final VCFInfoHeaderLine vihl : this.annotator.getInfoHeaderLines()) {
if (h2.getInfoHeaderLine(vihl.getID()) != null) {
throw new UserException.MalformedVCFHeader("VCF INFO " + vihl.getID() + " already defined in input VCF.");
}
h2.addMetaDataLine(vihl);
}
super.writer.writeHeader(h2);
super.initialize();
}
use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.
the class WindowVariants method initialize.
@Override
public void initialize() {
if (this.window_size <= 0) {
throw new UserException("Bad window size.");
}
if (this.window_shift <= 0) {
throw new UserException("Bad window shift.");
}
if (this.winName == null || this.winName.isEmpty()) {
throw new UserException("Bad INFO ID windowName");
}
final Map<String, String> exprMap = new HashMap<>();
for (final String expStr : this.selectExpressions) {
exprMap.put("expr" + (1 + exprMap.size()), expStr);
}
this.jexls = VariantContextUtils.initializeMatchExps(exprMap);
final VCFHeader header = new VCFHeader(super.getVcfHeader());
if (header.getInfoHeaderLine(this.winName) != null) {
throw new UserException("VCF header already contains the INFO header ID=" + this.winName);
}
header.addMetaDataLine(new VCFInfoHeaderLine(this.winName, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Window : start|end|number-of-matching-variants|number-of-non-matching-variants"));
super.writer.writeHeader(header);
super.initialize();
}
Aggregations