use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.
the class VcfDoest method run.
private void run(final LineIterator lr, final PrintWriter pw) throws IOException {
SortingCollection<TranscriptInfo> sorting = null;
CloseableIterator<TranscriptInfo> iter2 = null;
try {
while (lr.hasNext()) {
VcfIterator in = VCFUtils.createVcfIteratorFromLineIterator(lr, true);
final VCFHeader header = in.getHeader();
final Pedigree pedigree = Pedigree.newParser().parse(header);
if (pedigree.isEmpty()) {
throw new IOException("No pedigree found in header VCF header. use VcfInjectPedigree to add it");
}
final SortedSet<Pedigree.Person> individuals = new TreeSet<>();
for (final Pedigree.Person individual : pedigree.getPersons()) {
if (individual.isAffected() || individual.isUnaffected()) {
individuals.add(individual);
}
}
boolean first = true;
pw.println("# samples ( 0: unaffected 1:affected)");
pw.print("population <- data.frame(family=c(");
first = true;
for (final Pedigree.Person person : individuals) {
if (!first)
pw.print(",");
pw.print("\"" + person.getFamily().getId() + "\"");
first = false;
}
pw.print("),name=c(");
first = true;
for (final Pedigree.Person person : individuals) {
if (!first)
pw.print(",");
pw.print("\"" + person.getId() + "\"");
first = false;
}
pw.print("),status=c(");
first = true;
for (final Pedigree.Person person : individuals) {
if (!first)
pw.print(",");
pw.print(person.isUnaffected() ? 0 : 1);
first = false;
}
pw.println("))");
sorting = SortingCollection.newInstance(TranscriptInfo.class, new TranscriptInfoCodec(), new TranscriptInfoCmp(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
sorting.setDestructiveIteration(true);
final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
/**
* loop over variants
*/
while (in.hasNext() && !pw.checkError()) {
final VariantContext ctx = progess.watch(in.next());
if (ctx.isFiltered())
continue;
if (ctx.getAlternateAlleles().isEmpty())
continue;
final Allele altAllele = ctx.getAltAlleleWithHighestAlleleCount();
final MafCalculator mafCalculator = new MafCalculator(altAllele, ctx.getContig());
boolean genotyped = false;
for (final Pedigree.Person p : pedigree.getPersons()) {
if (!(p.isAffected() || p.isUnaffected()))
continue;
final Genotype g = ctx.getGenotype(p.getId());
if (g == null)
throw new IOException("Strange I cannot find individual " + p + " in the pedigree. Aborting.");
if (g.isCalled()) {
mafCalculator.add(g, p.isMale());
}
if (g.isHet() || g.isHomVar()) {
if (!g.getAlleles().contains(altAllele))
continue;
genotyped = true;
break;
}
}
if (!genotyped)
continue;
final Interval interval = new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd());
final List<KnownGene> genes = this.overlap(interval);
if (genes.isEmpty())
continue;
for (final KnownGene kg : genes) {
final TranscriptInfo trInfo = new TranscriptInfo();
trInfo.contig = kg.getContig();
trInfo.txStart = kg.getTxStart();
trInfo.txEnd = kg.getTxEnd();
trInfo.transcriptName = kg.getName();
trInfo.strand = (byte) (kg.isPositiveStrand() ? '+' : '-');
trInfo.exonCount = kg.getExonCount();
trInfo.transcriptLength = kg.getTranscriptLength();
trInfo.ctxStart = ctx.getStart();
trInfo.ref = ctx.getReference();
trInfo.alt = altAllele;
trInfo.maf = mafCalculator.getMaf();
trInfo.genotypes = new byte[individuals.size()];
int idx = 0;
for (final Pedigree.Person individual : individuals) {
final Genotype genotype = ctx.getGenotype(individual.getId());
final byte b;
if (genotype.isHomRef()) {
b = 0;
} else if (genotype.isHomVar() && genotype.getAlleles().contains(altAllele)) {
b = 2;
} else if (genotype.isHet() && genotype.getAlleles().contains(altAllele) && genotype.getAlleles().contains(ctx.getReference())) {
b = 1;
} else /* we treat 0/2 has hom-ref */
if (genotype.isHet() && !genotype.getAlleles().contains(altAllele) && genotype.getAlleles().contains(ctx.getReference())) {
LOG.warn("Treating " + genotype + " as hom-ref (0) alt=" + altAllele);
b = 0;
} else /* we treat 2/2 has hom-ref */
if (genotype.isHomVar() && !genotype.getAlleles().contains(altAllele)) {
LOG.warn("Treating " + genotype + " as hom-ref (0) alt=" + altAllele);
b = 0;
} else {
b = -9;
}
trInfo.genotypes[idx] = b;
++idx;
}
KnownGene archetype = kg;
/* find gene archetype = longest overlapping */
for (final KnownGene kg2 : genes) {
if (kg2 == kg)
continue;
if (archetype.getStrand().equals(kg2.getStrand()) && archetype.getTranscriptLength() < kg2.getTranscriptLength()) {
archetype = kg2;
}
}
trInfo.archetypeName = archetype.getName();
trInfo.archetypeLength = archetype.getTranscriptLength();
boolean ctxWasFoundInExon = false;
final int ctxPos0 = ctx.getStart() - 1;
int indexInTranscript0 = 0;
for (final KnownGene.Exon exon : kg.getExons()) {
// variant in exon ?
if (!(exon.getStart() > (ctx.getEnd() - 1) || (ctx.getStart() - 1) >= exon.getEnd())) {
ctxWasFoundInExon = true;
indexInTranscript0 += (ctxPos0 - exon.getStart());
if (kg.isNegativeStrand()) {
indexInTranscript0 = (kg.getTranscriptLength() - 1) - indexInTranscript0;
}
trInfo.indexInTranscript0 = indexInTranscript0;
trInfo.overlapName = exon.getName();
sorting.add(trInfo);
break;
} else {
indexInTranscript0 += (exon.getEnd() - exon.getStart());
}
}
if (ctxWasFoundInExon) {
continue;
}
indexInTranscript0 = 0;
// search closest intron/exon junction
for (int ex = 0; ex + 1 < kg.getExonCount(); ++ex) {
final KnownGene.Exon exon1 = kg.getExon(ex);
indexInTranscript0 += (exon1.getEnd() - exon1.getStart());
final KnownGene.Exon exon2 = kg.getExon(ex + 1);
if (exon1.getEnd() <= ctxPos0 && ctxPos0 < exon2.getStart()) {
final int dist_to_exon1 = ctxPos0 - exon1.getEnd();
final int dist_to_exon2 = exon2.getStart() - ctxPos0;
if (dist_to_exon2 < dist_to_exon1) {
indexInTranscript0++;
}
if (kg.isNegativeStrand()) {
indexInTranscript0 = (kg.getTranscriptLength() - 1) - indexInTranscript0;
}
trInfo.indexInTranscript0 = indexInTranscript0;
trInfo.overlapName = exon1.getNextIntron().getName();
sorting.add(trInfo);
break;
}
}
}
// end loop over genes
}
// end while loop over variants
progess.finish();
sorting.doneAdding();
LOG.info("done adding");
iter2 = sorting.iterator();
final EqualRangeIterator<TranscriptInfo> eqiter = new EqualRangeIterator<TranscriptInfo>(iter2, new Comparator<TranscriptInfo>() {
@Override
public int compare(final TranscriptInfo o1, final TranscriptInfo o2) {
int i = o1.contig.compareTo(o2.contig);
if (i != 0)
return i;
i = o1.transcriptName.compareTo(o2.transcriptName);
return i;
}
});
while (eqiter.hasNext()) {
final List<TranscriptInfo> list = eqiter.next();
final TranscriptInfo front = list.get(0);
pw.println("# BEGIN TRANSCRIPT " + front.transcriptName + " ##########################################");
pw.println("transcript.chrom <- \"" + front.contig + "\"");
pw.println("transcript.txStart0 <- " + front.txStart + "");
pw.println("transcript.txEnd0 <- " + front.txEnd + "");
pw.println("transcript.name <- \"" + front.transcriptName + "\"");
pw.println("transcript.strand <- \"" + ((char) front.strand) + "\"");
pw.println("transcript.length <- " + front.transcriptLength + "");
pw.println("transcript.exonCount <- " + front.exonCount + "");
pw.println("archetype.name <- \"" + front.archetypeName + "\"");
pw.println("archetype.length <- " + front.archetypeLength + "");
pw.print("variants <- data.frame(chrom=c(");
first = true;
for (final TranscriptInfo v : list) {
if (!first)
pw.print(",");
pw.print("\"" + v.contig + "\"");
first = false;
}
pw.print("),chromStart=c(");
first = true;
for (final TranscriptInfo v : list) {
if (!first)
pw.print(",");
pw.print(v.ctxStart);
first = false;
}
pw.print("),chromEnd=c(");
first = true;
for (final TranscriptInfo v : list) {
if (!first)
pw.print(",");
pw.print(v.ctxStart + v.ref.length() - 1);
first = false;
}
pw.print("),refAllele=c(");
first = true;
for (final TranscriptInfo v : list) {
if (!first)
pw.print(",");
pw.print("\"" + v.ref.getDisplayString() + "\"");
first = false;
}
pw.print("),altAllele=c(");
first = true;
for (final TranscriptInfo v : list) {
if (!first)
pw.print(",");
pw.print("\"" + v.alt.getDisplayString() + "\"");
first = false;
}
pw.print("),positionInTranscript1=c(");
first = true;
for (final TranscriptInfo v : list) {
if (!first)
pw.print(",");
pw.print(v.indexInTranscript0 + 1);
first = false;
}
pw.print("),maf=c(");
first = true;
for (final TranscriptInfo v : list) {
if (!first)
pw.print(",");
pw.print(v.maf);
first = false;
}
pw.print("),overlapName=c(");
first = true;
for (final TranscriptInfo v : list) {
if (!first)
pw.print(",");
pw.print("\"" + v.overlapName + "\"");
first = false;
}
pw.println("))");
pw.println("# genotypes as a list. Should be a multiple of length(samples).");
pw.println("# 0 is homref (0/0), 1 is het (0/1), 2 is homvar (1/1)");
pw.println("# if the variant contains another ALT allele: (0/2) and (2/2) are considered 0 (homref)");
pw.print("genotypes <- c(");
first = true;
for (final TranscriptInfo tr : list) {
for (byte g : tr.genotypes) {
if (!first)
pw.print(",");
first = false;
pw.print((int) g);
}
}
pw.println(")");
pw.println("stopifnot(NROW(variants) * NROW(population) == length(genotypes) )");
if (this.userDefinedFunName == null || this.userDefinedFunName.trim().isEmpty()) {
pw.println("## WARNING not user-defined R function was defined");
} else {
pw.println("# consumme data with user-defined R function ");
pw.println(this.userDefinedFunName + "()");
}
pw.println("# END TRANSCRIPT " + front.transcriptName + " ##########################################");
}
// end while eqiter
eqiter.close();
iter2.close();
iter2 = null;
sorting.cleanup();
sorting = null;
}
} finally {
CloserUtil.close(iter2);
if (sorting != null)
sorting.cleanup();
}
}
use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.
the class VcfAnnotWithBeacon method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, final VcfIterator iter, final VariantContextWriter out) {
CloseableHttpClient httpClient = null;
InputStream contentInputStream = null;
try {
final org.apache.http.impl.client.HttpClientBuilder hb = HttpClients.custom();
if (this.ignoreCertErrors) {
// http://stackoverflow.com/questions/24720013/apache-http-client-ssl-certificate-error
System.setProperty("jsse.enableSNIExtension", "false");
final SSLContext sslContext = org.apache.http.conn.ssl.SSLContexts.custom().loadTrustMaterial(null, new org.apache.http.conn.ssl.TrustStrategy() {
@Override
public boolean isTrusted(final X509Certificate[] chain, final String authType) throws CertificateException {
return true;
}
}).useTLS().build();
final org.apache.http.conn.ssl.SSLConnectionSocketFactory connectionFactory = new org.apache.http.conn.ssl.SSLConnectionSocketFactory(sslContext, new org.apache.http.conn.ssl.AllowAllHostnameVerifier());
hb.setSSLSocketFactory(connectionFactory);
}
httpClient = hb.build();
HttpGet httpGetRequest = null;
final Set<String> available_chromosomes = new HashSet<>();
try {
httpGetRequest = new HttpGet(baseurl + "/chromosomes");
httpGetRequest.setHeader("Accept", ContentType.APPLICATION_JSON.getMimeType());
contentInputStream = httpClient.execute(httpGetRequest).getEntity().getContent();
JsonParser jsonparser = new JsonParser();
final JsonElement root = jsonparser.parse(new InputStreamReader(contentInputStream));
Iterator<JsonElement> jsr = root.getAsJsonArray().iterator();
while (jsr.hasNext()) {
final String ctg = jsr.next().getAsString();
available_chromosomes.add(ctg);
}
LOG.debug(available_chromosomes);
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(contentInputStream);
}
final Set<String> available_alleles = new HashSet<>();
try {
httpGetRequest = new HttpGet(baseurl + "/alleles");
httpGetRequest.setHeader("Accept", ContentType.APPLICATION_JSON.getMimeType());
contentInputStream = httpClient.execute(httpGetRequest).getEntity().getContent();
JsonParser jsonparser = new JsonParser();
final JsonElement root = jsonparser.parse(new InputStreamReader(contentInputStream));
Iterator<JsonElement> jsr = root.getAsJsonArray().iterator();
while (jsr.hasNext()) {
final String allele = jsr.next().getAsString();
available_alleles.add(allele);
}
LOG.debug(available_alleles);
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(contentInputStream);
}
final StoredResponseBinding storedResponseBinding = new StoredResponseBinding();
final VCFHeader header = new VCFHeader(iter.getHeader());
final VCFInfoHeaderLine infoHeaderLine = new VCFInfoHeaderLine(this.infoTag, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Tag inserted with " + getProgramName());
header.addMetaDataLine(infoHeaderLine);
DatabaseEntry key = new DatabaseEntry();
DatabaseEntry data = new DatabaseEntry();
out.writeHeader(header);
while (iter.hasNext()) {
final VariantContext ctx = iter.next();
if (!ctx.isVariant() || ctx.getReference().isSymbolic()) {
out.add(ctx);
continue;
}
if (ctx.hasAttribute(infoHeaderLine.getID()) && this.dontUpdateIfInfoIsPresent) {
out.add(ctx);
continue;
}
String beaconContig = ctx.getContig();
if (!available_chromosomes.contains(beaconContig)) {
if (beaconContig.startsWith("chr")) {
beaconContig = beaconContig.substring(3);
}
if (!available_chromosomes.contains(beaconContig)) {
out.add(ctx);
continue;
}
}
final List<Allele> altAlleles = ctx.getAlternateAlleles();
if (altAlleles.isEmpty()) {
out.add(ctx);
continue;
}
final Set<String> newInfo = new HashSet<>();
for (final Allele alt : altAlleles) {
if (alt.isSymbolic() || alt.isNoCall())
continue;
final StringBuilder buildUrl = new StringBuilder();
buildUrl.append("chrom=");
buildUrl.append(URLEncoder.encode(beaconContig, "UTF-8"));
buildUrl.append("&pos=");
/*
* "Coordinate within a chromosome. Position is a number and is 0-based"
* .
*/
buildUrl.append(ctx.getStart() - 1);
buildUrl.append("&allele=");
final String allele;
if (ctx.getReference().length() > alt.length()) {
// del
allele = "D";
} else if (ctx.getReference().length() > alt.length()) {
// ins
allele = "I";
} else {
allele = alt.getDisplayString();
}
if (!available_alleles.contains(allele))
continue;
buildUrl.append(allele);
buildUrl.append("&ref=");
buildUrl.append(URLEncoder.encode(this.genomeBuild, "UTF-8"));
final String queryUrl = buildUrl.toString();
boolean foundInBdb = false;
Set<String> foundIn = null;
if (this.beaconDatabase != null) {
StringBinding.stringToEntry(queryUrl, key);
if (this.beaconDatabase.get(this.txn, key, data, LockMode.DEFAULT) == OperationStatus.SUCCESS) {
StoredResponse response = storedResponseBinding.entryToObject(data);
if (// TODO check how old is
response.timeStamp < 0) // that data
{
response = null;
this.beaconDatabase.delete(this.txn, key);
}
if (response != null) {
foundInBdb = true;
foundIn = response.foundIn;
}
}
}
if (foundIn == null) {
foundIn = new HashSet<>();
try {
httpGetRequest = new HttpGet(baseurl + "/responses?" + queryUrl);
httpGetRequest.setHeader("Accept", ContentType.APPLICATION_JSON.getMimeType());
LOG.debug(httpGetRequest.getURI());
contentInputStream = httpClient.execute(httpGetRequest).getEntity().getContent();
JsonParser jsonparser = new JsonParser();
final JsonElement root = jsonparser.parse(new InputStreamReader(contentInputStream));
Iterator<JsonElement> jsr = root.getAsJsonArray().iterator();
while (jsr.hasNext()) {
final JsonObject b = jsr.next().getAsJsonObject();
if (!(b.has("beacon") && b.has("response")))
continue;
final String beacon_id = b.get("beacon").getAsJsonObject().get("id").getAsString();
final JsonElement response_prim = b.get("response");
if (response_prim.isJsonPrimitive() && response_prim.getAsBoolean()) {
foundIn.add(beacon_id);
}
}
} catch (final Exception err) {
LOG.error(err);
if (stopOnNetworkError) {
throw new RuntimeIOException(err);
}
} finally {
CloserUtil.close(contentInputStream);
}
}
if (this.beaconDatabase != null && !foundInBdb) {
StoredResponse response = new StoredResponse();
response.timeStamp = System.currentTimeMillis();
response.foundIn = foundIn;
}
// 17&pos=41244981&=G&ref=GRCh37")
newInfo.addAll(foundIn.stream().map(S -> alt.getDisplayString() + "|" + S).collect(Collectors.toSet()));
}
if (newInfo.isEmpty()) {
out.add(ctx);
continue;
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
vcb.attribute(infoHeaderLine.getID(), new ArrayList<String>(newInfo));
out.add(vcb.make());
}
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(httpClient);
}
}
use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.
the class EigenVariants method initialize.
@Override
public void initialize() {
logger.info("opening eigen directory :" + eigenDir);
IOUtil.assertDirectoryIsReadable(eigenDir);
this.annotator = new EigenInfoAnnotator(this.eigenDir);
if (this.tabixPrefix != null) {
logger.info("changing eigen file prefix from :" + this.annotator.getTabixPrefix() + " to " + this.tabixPrefix);
this.annotator.setTabixFilePrefix(this.tabixPrefix);
}
final VCFHeader header1 = super.getVcfHeader();
final VCFHeader h2 = new VCFHeader(header1);
for (final VCFInfoHeaderLine vihl : this.annotator.getInfoHeaderLines()) {
if (h2.getInfoHeaderLine(vihl.getID()) != null) {
throw new UserException.MalformedVCFHeader("VCF INFO " + vihl.getID() + " already defined in input VCF.");
}
h2.addMetaDataLine(vihl);
}
super.writer.writeHeader(h2);
super.initialize();
}
use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.
the class SoftClipAnnotator method initialize.
public void initialize() {
final Set<VCFHeaderLine> hInfo = new HashSet<>();
final List<String> rodName = Arrays.asList(variantCollection.variants.getName());
final Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);
for (final String sample : samples) {
this.sample2bam.put(sample, new ArrayList<>());
}
for (final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), rodName)) {
if (isUniqueHeaderLine(line, hInfo))
hInfo.add(line);
}
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
VCFHeader header2 = new VCFHeader(vcfHeader);
header2.addMetaDataLine(this.numClipInGenotypeFormatHeaderLine);
header2.addMetaDataLine(this.filterHaveClipInVariant);
vcfWriter.writeHeader(header2);
final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
for (final File samFilename : IOUtil.unrollFiles(this.samFilenames, ".bam")) {
logger.info("opening " + samFilename);
final SamReader r = srf.open(samFilename);
final Set<String> sampleset = new HashSet<>();
for (final SAMReadGroupRecord g : r.getFileHeader().getReadGroups()) {
if (g.getSample() == null || !this.sample2bam.containsKey(g.getSample()))
continue;
sampleset.add(g.getSample());
}
if (sampleset.isEmpty()) {
logger.info("no VCF sample in " + samFilename);
CloserUtil.close(r);
continue;
}
this.samReaders.add(r);
for (final String sample : sampleset) {
if (!this.sample2bam.containsKey(sample))
continue;
this.sample2bam.get(sample).add(r);
}
}
}
use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.
the class WindowVariants method initialize.
@Override
public void initialize() {
if (this.window_size <= 0) {
throw new UserException("Bad window size.");
}
if (this.window_shift <= 0) {
throw new UserException("Bad window shift.");
}
if (this.winName == null || this.winName.isEmpty()) {
throw new UserException("Bad INFO ID windowName");
}
final Map<String, String> exprMap = new HashMap<>();
for (final String expStr : this.selectExpressions) {
exprMap.put("expr" + (1 + exprMap.size()), expStr);
}
this.jexls = VariantContextUtils.initializeMatchExps(exprMap);
final VCFHeader header = new VCFHeader(super.getVcfHeader());
if (header.getInfoHeaderLine(this.winName) != null) {
throw new UserException("VCF header already contains the INFO header ID=" + this.winName);
}
header.addMetaDataLine(new VCFInfoHeaderLine(this.winName, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Window : start|end|number-of-matching-variants|number-of-non-matching-variants"));
super.writer.writeHeader(header);
super.initialize();
}
Aggregations