Search in sources :

Example 41 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VcfDoest method run.

private void run(final LineIterator lr, final PrintWriter pw) throws IOException {
    SortingCollection<TranscriptInfo> sorting = null;
    CloseableIterator<TranscriptInfo> iter2 = null;
    try {
        while (lr.hasNext()) {
            VcfIterator in = VCFUtils.createVcfIteratorFromLineIterator(lr, true);
            final VCFHeader header = in.getHeader();
            final Pedigree pedigree = Pedigree.newParser().parse(header);
            if (pedigree.isEmpty()) {
                throw new IOException("No pedigree found in header VCF header. use VcfInjectPedigree to add it");
            }
            final SortedSet<Pedigree.Person> individuals = new TreeSet<>();
            for (final Pedigree.Person individual : pedigree.getPersons()) {
                if (individual.isAffected() || individual.isUnaffected()) {
                    individuals.add(individual);
                }
            }
            boolean first = true;
            pw.println("# samples ( 0: unaffected 1:affected)");
            pw.print("population <- data.frame(family=c(");
            first = true;
            for (final Pedigree.Person person : individuals) {
                if (!first)
                    pw.print(",");
                pw.print("\"" + person.getFamily().getId() + "\"");
                first = false;
            }
            pw.print("),name=c(");
            first = true;
            for (final Pedigree.Person person : individuals) {
                if (!first)
                    pw.print(",");
                pw.print("\"" + person.getId() + "\"");
                first = false;
            }
            pw.print("),status=c(");
            first = true;
            for (final Pedigree.Person person : individuals) {
                if (!first)
                    pw.print(",");
                pw.print(person.isUnaffected() ? 0 : 1);
                first = false;
            }
            pw.println("))");
            sorting = SortingCollection.newInstance(TranscriptInfo.class, new TranscriptInfoCodec(), new TranscriptInfoCmp(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
            sorting.setDestructiveIteration(true);
            final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
            /**
             * loop over variants
             */
            while (in.hasNext() && !pw.checkError()) {
                final VariantContext ctx = progess.watch(in.next());
                if (ctx.isFiltered())
                    continue;
                if (ctx.getAlternateAlleles().isEmpty())
                    continue;
                final Allele altAllele = ctx.getAltAlleleWithHighestAlleleCount();
                final MafCalculator mafCalculator = new MafCalculator(altAllele, ctx.getContig());
                boolean genotyped = false;
                for (final Pedigree.Person p : pedigree.getPersons()) {
                    if (!(p.isAffected() || p.isUnaffected()))
                        continue;
                    final Genotype g = ctx.getGenotype(p.getId());
                    if (g == null)
                        throw new IOException("Strange I cannot find individual " + p + " in the pedigree. Aborting.");
                    if (g.isCalled()) {
                        mafCalculator.add(g, p.isMale());
                    }
                    if (g.isHet() || g.isHomVar()) {
                        if (!g.getAlleles().contains(altAllele))
                            continue;
                        genotyped = true;
                        break;
                    }
                }
                if (!genotyped)
                    continue;
                final Interval interval = new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd());
                final List<KnownGene> genes = this.overlap(interval);
                if (genes.isEmpty())
                    continue;
                for (final KnownGene kg : genes) {
                    final TranscriptInfo trInfo = new TranscriptInfo();
                    trInfo.contig = kg.getContig();
                    trInfo.txStart = kg.getTxStart();
                    trInfo.txEnd = kg.getTxEnd();
                    trInfo.transcriptName = kg.getName();
                    trInfo.strand = (byte) (kg.isPositiveStrand() ? '+' : '-');
                    trInfo.exonCount = kg.getExonCount();
                    trInfo.transcriptLength = kg.getTranscriptLength();
                    trInfo.ctxStart = ctx.getStart();
                    trInfo.ref = ctx.getReference();
                    trInfo.alt = altAllele;
                    trInfo.maf = mafCalculator.getMaf();
                    trInfo.genotypes = new byte[individuals.size()];
                    int idx = 0;
                    for (final Pedigree.Person individual : individuals) {
                        final Genotype genotype = ctx.getGenotype(individual.getId());
                        final byte b;
                        if (genotype.isHomRef()) {
                            b = 0;
                        } else if (genotype.isHomVar() && genotype.getAlleles().contains(altAllele)) {
                            b = 2;
                        } else if (genotype.isHet() && genotype.getAlleles().contains(altAllele) && genotype.getAlleles().contains(ctx.getReference())) {
                            b = 1;
                        } else /* we treat 0/2 has hom-ref */
                        if (genotype.isHet() && !genotype.getAlleles().contains(altAllele) && genotype.getAlleles().contains(ctx.getReference())) {
                            LOG.warn("Treating " + genotype + " as hom-ref (0) alt=" + altAllele);
                            b = 0;
                        } else /* we treat 2/2 has hom-ref */
                        if (genotype.isHomVar() && !genotype.getAlleles().contains(altAllele)) {
                            LOG.warn("Treating " + genotype + " as hom-ref (0) alt=" + altAllele);
                            b = 0;
                        } else {
                            b = -9;
                        }
                        trInfo.genotypes[idx] = b;
                        ++idx;
                    }
                    KnownGene archetype = kg;
                    /* find gene archetype = longest overlapping */
                    for (final KnownGene kg2 : genes) {
                        if (kg2 == kg)
                            continue;
                        if (archetype.getStrand().equals(kg2.getStrand()) && archetype.getTranscriptLength() < kg2.getTranscriptLength()) {
                            archetype = kg2;
                        }
                    }
                    trInfo.archetypeName = archetype.getName();
                    trInfo.archetypeLength = archetype.getTranscriptLength();
                    boolean ctxWasFoundInExon = false;
                    final int ctxPos0 = ctx.getStart() - 1;
                    int indexInTranscript0 = 0;
                    for (final KnownGene.Exon exon : kg.getExons()) {
                        // variant in exon ?
                        if (!(exon.getStart() > (ctx.getEnd() - 1) || (ctx.getStart() - 1) >= exon.getEnd())) {
                            ctxWasFoundInExon = true;
                            indexInTranscript0 += (ctxPos0 - exon.getStart());
                            if (kg.isNegativeStrand()) {
                                indexInTranscript0 = (kg.getTranscriptLength() - 1) - indexInTranscript0;
                            }
                            trInfo.indexInTranscript0 = indexInTranscript0;
                            trInfo.overlapName = exon.getName();
                            sorting.add(trInfo);
                            break;
                        } else {
                            indexInTranscript0 += (exon.getEnd() - exon.getStart());
                        }
                    }
                    if (ctxWasFoundInExon) {
                        continue;
                    }
                    indexInTranscript0 = 0;
                    // search closest intron/exon junction
                    for (int ex = 0; ex + 1 < kg.getExonCount(); ++ex) {
                        final KnownGene.Exon exon1 = kg.getExon(ex);
                        indexInTranscript0 += (exon1.getEnd() - exon1.getStart());
                        final KnownGene.Exon exon2 = kg.getExon(ex + 1);
                        if (exon1.getEnd() <= ctxPos0 && ctxPos0 < exon2.getStart()) {
                            final int dist_to_exon1 = ctxPos0 - exon1.getEnd();
                            final int dist_to_exon2 = exon2.getStart() - ctxPos0;
                            if (dist_to_exon2 < dist_to_exon1) {
                                indexInTranscript0++;
                            }
                            if (kg.isNegativeStrand()) {
                                indexInTranscript0 = (kg.getTranscriptLength() - 1) - indexInTranscript0;
                            }
                            trInfo.indexInTranscript0 = indexInTranscript0;
                            trInfo.overlapName = exon1.getNextIntron().getName();
                            sorting.add(trInfo);
                            break;
                        }
                    }
                }
            // end loop over genes
            }
            // end while loop over variants
            progess.finish();
            sorting.doneAdding();
            LOG.info("done adding");
            iter2 = sorting.iterator();
            final EqualRangeIterator<TranscriptInfo> eqiter = new EqualRangeIterator<TranscriptInfo>(iter2, new Comparator<TranscriptInfo>() {

                @Override
                public int compare(final TranscriptInfo o1, final TranscriptInfo o2) {
                    int i = o1.contig.compareTo(o2.contig);
                    if (i != 0)
                        return i;
                    i = o1.transcriptName.compareTo(o2.transcriptName);
                    return i;
                }
            });
            while (eqiter.hasNext()) {
                final List<TranscriptInfo> list = eqiter.next();
                final TranscriptInfo front = list.get(0);
                pw.println("# BEGIN TRANSCRIPT " + front.transcriptName + " ##########################################");
                pw.println("transcript.chrom <- \"" + front.contig + "\"");
                pw.println("transcript.txStart0 <- " + front.txStart + "");
                pw.println("transcript.txEnd0 <- " + front.txEnd + "");
                pw.println("transcript.name <- \"" + front.transcriptName + "\"");
                pw.println("transcript.strand <- \"" + ((char) front.strand) + "\"");
                pw.println("transcript.length <- " + front.transcriptLength + "");
                pw.println("transcript.exonCount <- " + front.exonCount + "");
                pw.println("archetype.name <- \"" + front.archetypeName + "\"");
                pw.println("archetype.length <- " + front.archetypeLength + "");
                pw.print("variants <- data.frame(chrom=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print("\"" + v.contig + "\"");
                    first = false;
                }
                pw.print("),chromStart=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print(v.ctxStart);
                    first = false;
                }
                pw.print("),chromEnd=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print(v.ctxStart + v.ref.length() - 1);
                    first = false;
                }
                pw.print("),refAllele=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print("\"" + v.ref.getDisplayString() + "\"");
                    first = false;
                }
                pw.print("),altAllele=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print("\"" + v.alt.getDisplayString() + "\"");
                    first = false;
                }
                pw.print("),positionInTranscript1=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print(v.indexInTranscript0 + 1);
                    first = false;
                }
                pw.print("),maf=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print(v.maf);
                    first = false;
                }
                pw.print("),overlapName=c(");
                first = true;
                for (final TranscriptInfo v : list) {
                    if (!first)
                        pw.print(",");
                    pw.print("\"" + v.overlapName + "\"");
                    first = false;
                }
                pw.println("))");
                pw.println("# genotypes as a list. Should be a multiple of length(samples).");
                pw.println("# 0 is homref (0/0), 1 is het (0/1), 2 is homvar (1/1)");
                pw.println("# if the variant contains another ALT allele: (0/2) and (2/2) are considered 0 (homref)");
                pw.print("genotypes <- c(");
                first = true;
                for (final TranscriptInfo tr : list) {
                    for (byte g : tr.genotypes) {
                        if (!first)
                            pw.print(",");
                        first = false;
                        pw.print((int) g);
                    }
                }
                pw.println(")");
                pw.println("stopifnot(NROW(variants) * NROW(population) == length(genotypes) )");
                if (this.userDefinedFunName == null || this.userDefinedFunName.trim().isEmpty()) {
                    pw.println("## WARNING not user-defined R function was defined");
                } else {
                    pw.println("# consumme data with user-defined R function ");
                    pw.println(this.userDefinedFunName + "()");
                }
                pw.println("# END TRANSCRIPT " + front.transcriptName + " ##########################################");
            }
            // end while eqiter
            eqiter.close();
            iter2.close();
            iter2 = null;
            sorting.cleanup();
            sorting = null;
        }
    } finally {
        CloserUtil.close(iter2);
        if (sorting != null)
            sorting.cleanup();
    }
}
Also used : VariantContext(htsjdk.variant.variantcontext.VariantContext) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) TreeSet(java.util.TreeSet) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Genotype(htsjdk.variant.variantcontext.Genotype) IOException(java.io.IOException) Allele(htsjdk.variant.variantcontext.Allele) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene) Interval(htsjdk.samtools.util.Interval)

Example 42 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VcfAnnotWithBeacon method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, final VcfIterator iter, final VariantContextWriter out) {
    CloseableHttpClient httpClient = null;
    InputStream contentInputStream = null;
    try {
        final org.apache.http.impl.client.HttpClientBuilder hb = HttpClients.custom();
        if (this.ignoreCertErrors) {
            // http://stackoverflow.com/questions/24720013/apache-http-client-ssl-certificate-error
            System.setProperty("jsse.enableSNIExtension", "false");
            final SSLContext sslContext = org.apache.http.conn.ssl.SSLContexts.custom().loadTrustMaterial(null, new org.apache.http.conn.ssl.TrustStrategy() {

                @Override
                public boolean isTrusted(final X509Certificate[] chain, final String authType) throws CertificateException {
                    return true;
                }
            }).useTLS().build();
            final org.apache.http.conn.ssl.SSLConnectionSocketFactory connectionFactory = new org.apache.http.conn.ssl.SSLConnectionSocketFactory(sslContext, new org.apache.http.conn.ssl.AllowAllHostnameVerifier());
            hb.setSSLSocketFactory(connectionFactory);
        }
        httpClient = hb.build();
        HttpGet httpGetRequest = null;
        final Set<String> available_chromosomes = new HashSet<>();
        try {
            httpGetRequest = new HttpGet(baseurl + "/chromosomes");
            httpGetRequest.setHeader("Accept", ContentType.APPLICATION_JSON.getMimeType());
            contentInputStream = httpClient.execute(httpGetRequest).getEntity().getContent();
            JsonParser jsonparser = new JsonParser();
            final JsonElement root = jsonparser.parse(new InputStreamReader(contentInputStream));
            Iterator<JsonElement> jsr = root.getAsJsonArray().iterator();
            while (jsr.hasNext()) {
                final String ctg = jsr.next().getAsString();
                available_chromosomes.add(ctg);
            }
            LOG.debug(available_chromosomes);
        } catch (final Exception err) {
            LOG.error(err);
            return -1;
        } finally {
            CloserUtil.close(contentInputStream);
        }
        final Set<String> available_alleles = new HashSet<>();
        try {
            httpGetRequest = new HttpGet(baseurl + "/alleles");
            httpGetRequest.setHeader("Accept", ContentType.APPLICATION_JSON.getMimeType());
            contentInputStream = httpClient.execute(httpGetRequest).getEntity().getContent();
            JsonParser jsonparser = new JsonParser();
            final JsonElement root = jsonparser.parse(new InputStreamReader(contentInputStream));
            Iterator<JsonElement> jsr = root.getAsJsonArray().iterator();
            while (jsr.hasNext()) {
                final String allele = jsr.next().getAsString();
                available_alleles.add(allele);
            }
            LOG.debug(available_alleles);
        } catch (final Exception err) {
            LOG.error(err);
            return -1;
        } finally {
            CloserUtil.close(contentInputStream);
        }
        final StoredResponseBinding storedResponseBinding = new StoredResponseBinding();
        final VCFHeader header = new VCFHeader(iter.getHeader());
        final VCFInfoHeaderLine infoHeaderLine = new VCFInfoHeaderLine(this.infoTag, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Tag inserted with " + getProgramName());
        header.addMetaDataLine(infoHeaderLine);
        DatabaseEntry key = new DatabaseEntry();
        DatabaseEntry data = new DatabaseEntry();
        out.writeHeader(header);
        while (iter.hasNext()) {
            final VariantContext ctx = iter.next();
            if (!ctx.isVariant() || ctx.getReference().isSymbolic()) {
                out.add(ctx);
                continue;
            }
            if (ctx.hasAttribute(infoHeaderLine.getID()) && this.dontUpdateIfInfoIsPresent) {
                out.add(ctx);
                continue;
            }
            String beaconContig = ctx.getContig();
            if (!available_chromosomes.contains(beaconContig)) {
                if (beaconContig.startsWith("chr")) {
                    beaconContig = beaconContig.substring(3);
                }
                if (!available_chromosomes.contains(beaconContig)) {
                    out.add(ctx);
                    continue;
                }
            }
            final List<Allele> altAlleles = ctx.getAlternateAlleles();
            if (altAlleles.isEmpty()) {
                out.add(ctx);
                continue;
            }
            final Set<String> newInfo = new HashSet<>();
            for (final Allele alt : altAlleles) {
                if (alt.isSymbolic() || alt.isNoCall())
                    continue;
                final StringBuilder buildUrl = new StringBuilder();
                buildUrl.append("chrom=");
                buildUrl.append(URLEncoder.encode(beaconContig, "UTF-8"));
                buildUrl.append("&pos=");
                /*
					 * "Coordinate within a chromosome. Position is a number and is 0-based"
					 * .
					 */
                buildUrl.append(ctx.getStart() - 1);
                buildUrl.append("&allele=");
                final String allele;
                if (ctx.getReference().length() > alt.length()) {
                    // del
                    allele = "D";
                } else if (ctx.getReference().length() > alt.length()) {
                    // ins
                    allele = "I";
                } else {
                    allele = alt.getDisplayString();
                }
                if (!available_alleles.contains(allele))
                    continue;
                buildUrl.append(allele);
                buildUrl.append("&ref=");
                buildUrl.append(URLEncoder.encode(this.genomeBuild, "UTF-8"));
                final String queryUrl = buildUrl.toString();
                boolean foundInBdb = false;
                Set<String> foundIn = null;
                if (this.beaconDatabase != null) {
                    StringBinding.stringToEntry(queryUrl, key);
                    if (this.beaconDatabase.get(this.txn, key, data, LockMode.DEFAULT) == OperationStatus.SUCCESS) {
                        StoredResponse response = storedResponseBinding.entryToObject(data);
                        if (// TODO check how old is
                        response.timeStamp < 0) // that data
                        {
                            response = null;
                            this.beaconDatabase.delete(this.txn, key);
                        }
                        if (response != null) {
                            foundInBdb = true;
                            foundIn = response.foundIn;
                        }
                    }
                }
                if (foundIn == null) {
                    foundIn = new HashSet<>();
                    try {
                        httpGetRequest = new HttpGet(baseurl + "/responses?" + queryUrl);
                        httpGetRequest.setHeader("Accept", ContentType.APPLICATION_JSON.getMimeType());
                        LOG.debug(httpGetRequest.getURI());
                        contentInputStream = httpClient.execute(httpGetRequest).getEntity().getContent();
                        JsonParser jsonparser = new JsonParser();
                        final JsonElement root = jsonparser.parse(new InputStreamReader(contentInputStream));
                        Iterator<JsonElement> jsr = root.getAsJsonArray().iterator();
                        while (jsr.hasNext()) {
                            final JsonObject b = jsr.next().getAsJsonObject();
                            if (!(b.has("beacon") && b.has("response")))
                                continue;
                            final String beacon_id = b.get("beacon").getAsJsonObject().get("id").getAsString();
                            final JsonElement response_prim = b.get("response");
                            if (response_prim.isJsonPrimitive() && response_prim.getAsBoolean()) {
                                foundIn.add(beacon_id);
                            }
                        }
                    } catch (final Exception err) {
                        LOG.error(err);
                        if (stopOnNetworkError) {
                            throw new RuntimeIOException(err);
                        }
                    } finally {
                        CloserUtil.close(contentInputStream);
                    }
                }
                if (this.beaconDatabase != null && !foundInBdb) {
                    StoredResponse response = new StoredResponse();
                    response.timeStamp = System.currentTimeMillis();
                    response.foundIn = foundIn;
                }
                // 17&pos=41244981&=G&ref=GRCh37")
                newInfo.addAll(foundIn.stream().map(S -> alt.getDisplayString() + "|" + S).collect(Collectors.toSet()));
            }
            if (newInfo.isEmpty()) {
                out.add(ctx);
                continue;
            }
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.attribute(infoHeaderLine.getID(), new ArrayList<String>(newInfo));
            out.add(vcb.make());
        }
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(httpClient);
    }
}
Also used : HttpGet(org.apache.http.client.methods.HttpGet) VariantContext(htsjdk.variant.variantcontext.VariantContext) JsonObject(com.google.gson.JsonObject) CertificateException(java.security.cert.CertificateException) DatabaseEntry(com.sleepycat.je.DatabaseEntry) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) JsonParser(com.google.gson.JsonParser) CloseableHttpClient(org.apache.http.impl.client.CloseableHttpClient) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) SSLContext(javax.net.ssl.SSLContext) X509Certificate(java.security.cert.X509Certificate) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) CertificateException(java.security.cert.CertificateException) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) JsonElement(com.google.gson.JsonElement)

Example 43 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class EigenVariants method initialize.

@Override
public void initialize() {
    logger.info("opening eigen directory :" + eigenDir);
    IOUtil.assertDirectoryIsReadable(eigenDir);
    this.annotator = new EigenInfoAnnotator(this.eigenDir);
    if (this.tabixPrefix != null) {
        logger.info("changing eigen file prefix from  :" + this.annotator.getTabixPrefix() + " to " + this.tabixPrefix);
        this.annotator.setTabixFilePrefix(this.tabixPrefix);
    }
    final VCFHeader header1 = super.getVcfHeader();
    final VCFHeader h2 = new VCFHeader(header1);
    for (final VCFInfoHeaderLine vihl : this.annotator.getInfoHeaderLines()) {
        if (h2.getInfoHeaderLine(vihl.getID()) != null) {
            throw new UserException.MalformedVCFHeader("VCF INFO " + vihl.getID() + " already defined in input VCF.");
        }
        h2.addMetaDataLine(vihl);
    }
    super.writer.writeHeader(h2);
    super.initialize();
}
Also used : EigenInfoAnnotator(com.github.lindenb.jvarkit.tools.vcfeigen.EigenInfoAnnotator) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine)

Example 44 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class SoftClipAnnotator method initialize.

public void initialize() {
    final Set<VCFHeaderLine> hInfo = new HashSet<>();
    final List<String> rodName = Arrays.asList(variantCollection.variants.getName());
    final Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);
    for (final String sample : samples) {
        this.sample2bam.put(sample, new ArrayList<>());
    }
    for (final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), rodName)) {
        if (isUniqueHeaderLine(line, hInfo))
            hInfo.add(line);
    }
    VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
    VCFHeader header2 = new VCFHeader(vcfHeader);
    header2.addMetaDataLine(this.numClipInGenotypeFormatHeaderLine);
    header2.addMetaDataLine(this.filterHaveClipInVariant);
    vcfWriter.writeHeader(header2);
    final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
    for (final File samFilename : IOUtil.unrollFiles(this.samFilenames, ".bam")) {
        logger.info("opening " + samFilename);
        final SamReader r = srf.open(samFilename);
        final Set<String> sampleset = new HashSet<>();
        for (final SAMReadGroupRecord g : r.getFileHeader().getReadGroups()) {
            if (g.getSample() == null || !this.sample2bam.containsKey(g.getSample()))
                continue;
            sampleset.add(g.getSample());
        }
        if (sampleset.isEmpty()) {
            logger.info("no VCF sample in " + samFilename);
            CloserUtil.close(r);
            continue;
        }
        this.samReaders.add(r);
        for (final String sample : sampleset) {
            if (!this.sample2bam.containsKey(sample))
                continue;
            this.sample2bam.get(sample).add(r);
        }
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File) HashSet(java.util.HashSet)

Example 45 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class WindowVariants method initialize.

@Override
public void initialize() {
    if (this.window_size <= 0) {
        throw new UserException("Bad window size.");
    }
    if (this.window_shift <= 0) {
        throw new UserException("Bad window shift.");
    }
    if (this.winName == null || this.winName.isEmpty()) {
        throw new UserException("Bad INFO ID windowName");
    }
    final Map<String, String> exprMap = new HashMap<>();
    for (final String expStr : this.selectExpressions) {
        exprMap.put("expr" + (1 + exprMap.size()), expStr);
    }
    this.jexls = VariantContextUtils.initializeMatchExps(exprMap);
    final VCFHeader header = new VCFHeader(super.getVcfHeader());
    if (header.getInfoHeaderLine(this.winName) != null) {
        throw new UserException("VCF header already contains the INFO header ID=" + this.winName);
    }
    header.addMetaDataLine(new VCFInfoHeaderLine(this.winName, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Window : start|end|number-of-matching-variants|number-of-non-matching-variants"));
    super.writer.writeHeader(header);
    super.initialize();
}
Also used : HashMap(java.util.HashMap) UserException(org.broadinstitute.gatk.utils.exceptions.UserException) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine)

Aggregations

VCFHeader (htsjdk.variant.vcf.VCFHeader)182 VariantContext (htsjdk.variant.variantcontext.VariantContext)113 File (java.io.File)93 ArrayList (java.util.ArrayList)79 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)73 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)64 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)63 HashSet (java.util.HashSet)60 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)58 IOException (java.io.IOException)55 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)52 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)49 Genotype (htsjdk.variant.variantcontext.Genotype)48 Allele (htsjdk.variant.variantcontext.Allele)47 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)47 List (java.util.List)44 Set (java.util.Set)38 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)36 CloserUtil (htsjdk.samtools.util.CloserUtil)35 Collectors (java.util.stream.Collectors)34