Search in sources :

Example 46 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class MiniCaller method doWork.

@Override
public int doWork(final List<String> args) {
    ConcatSam.ConcatSamIterator iter = null;
    try {
        if (this.fastaFile == null) {
            LOG.error("no REF");
            return -1;
        }
        /* load faid */
        final ReferenceGenomeFactory referenceGenomeFactory = new ReferenceGenomeFactory();
        this.referenceGenome = referenceGenomeFactory.openFastaFile(this.fastaFile);
        this.dictionary = this.referenceGenome.getDictionary();
        if (this.dictionary == null) {
            LOG.error(JvarkitException.FastaDictionaryMissing.getMessage(this.fastaFile.getPath()));
        }
        /* create sam record iterator */
        iter = new ConcatSam.Factory().addInterval(this.rgnStr).setEnableUnrollList(true).open(args);
        final SAMFileHeader samFileheader = iter.getFileHeader();
        final SAMSequenceDictionary dict = samFileheader.getSequenceDictionary();
        if (dict == null) {
            LOG.error(JvarkitException.BamDictionaryMissing.getMessage(String.join(", ", args)));
            return -1;
        }
        if (!SequenceUtil.areSequenceDictionariesEqual(dict, this.dictionary)) {
            LOG.error(JvarkitException.DictionariesAreNotTheSame.getMessage(dict, this.dictionary));
            return -1;
        }
        final List<SAMReadGroupRecord> groups = samFileheader.getReadGroups();
        if (groups == null || groups.isEmpty()) {
            LOG.error("No group defined in input");
            return -1;
        }
        final Set<String> sampleSet = groups.stream().map(srgr -> this.samRecordPartition.apply(srgr, samRecordPartition.name())).collect(Collectors.toSet());
        /* create VCF metadata */
        final Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
        metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY));
        metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY));
        metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY));
        metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY));
        metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY));
        metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY));
        metaData.add(new VCFFormatHeaderLine("DPG", // one value of each genotype
        VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Depth for each allele"));
        metaData.add(new VCFFormatHeaderLine("DP4", 4, VCFHeaderLineType.Integer, "Depth ReforAlt|Strand : RF,RR,AF,AR"));
        metaData.add(new VCFInfoHeaderLine("INDEL", 1, VCFHeaderLineType.Flag, "Variant is indel"));
        // addMetaData(metaData);
        final VCFHeader vcfHeader = new VCFHeader(metaData, sampleSet);
        vcfHeader.setSequenceDictionary(this.dictionary);
        /* create variant context */
        this.variantContextWriter = super.openVariantContextWriter(outputFile);
        this.variantContextWriter.writeHeader(vcfHeader);
        ReferenceContig genomicSeq = null;
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(this.dictionary);
        for (; ; ) {
            SAMRecord rec = null;
            if (iter.hasNext()) {
                rec = progress.watch(iter.next());
                if (rec.getReadUnmappedFlag())
                    continue;
                if (this.readFilter.filterOut(rec))
                    continue;
                /* flush buffer if needed */
                while (!this.buffer.isEmpty() && (this.buffer.get(0).tid < rec.getReferenceIndex() || (this.buffer.get(0).tid == rec.getReferenceIndex() && (this.buffer.get(0).getEnd()) < rec.getAlignmentStart()))) {
                    this.buffer.remove(0).print();
                }
                /* get genomic sequence at this position */
                if (genomicSeq == null || !genomicSeq.getContig().equals(rec.getContig())) {
                    genomicSeq = this.referenceGenome.getContig(rec.getContig());
                }
                final Cigar cigar = rec.getCigar();
                if (cigar == null)
                    continue;
                int readPos = 0;
                // 0 based-reference
                int refPos0 = rec.getAlignmentStart() - 1;
                final byte[] bases = rec.getReadBases();
                final byte[] quals = rec.getBaseQualities();
                final String sampleName = this.samRecordPartition.getPartion(rec, samRecordPartition.name());
                for (final CigarElement ce : cigar.getCigarElements()) {
                    final CigarOperator op = ce.getOperator();
                    switch(op) {
                        case P:
                            break;
                        case H:
                            break;
                        case S:
                            readPos += ce.getLength();
                            break;
                        // go
                        case N:
                        case D:
                            {
                                if (// we need base before deletion
                                refPos0 > 0) {
                                    char refBase = genomicSeq.charAt(refPos0 - 1);
                                    /* we use base before deletion */
                                    final StringBuilder sb = new StringBuilder(ce.getLength());
                                    sb.append(refBase);
                                    for (int i = 0; i < ce.getLength(); ++i) {
                                        sb.append(genomicSeq.charAt(refPos0 + i));
                                    }
                                    findContext(rec.getReferenceIndex(), // we use base *before deletion */
                                    refPos0 - 1, Allele.create(sb.toString(), true)).getSample(sampleName).getAllele(Allele.create(String.valueOf(refBase), false)).incr(rec.getReadNegativeStrandFlag());
                                }
                                refPos0 += ce.getLength();
                                break;
                            }
                        case I:
                            {
                                if (refPos0 > 0) {
                                    // float qual=0;
                                    char refBase = Character.toUpperCase(genomicSeq.charAt(refPos0 - 1));
                                    final StringBuilder sb = new StringBuilder(1 + ce.getLength());
                                    sb.append(refBase);
                                    for (int i = 0; i < ce.getLength(); ++i) {
                                        sb.append((char) bases[readPos + i]);
                                    // qual+=(readPos + i < quals.length?quals[ readPos + i]:0);
                                    }
                                    findContext(rec.getReferenceIndex(), // we use base *before deletion */
                                    refPos0 - 1, Allele.create(String.valueOf(refBase), true)).getSample(sampleName).getAllele(Allele.create(sb.toString().toUpperCase(), false)).incr(rec.getReadNegativeStrandFlag());
                                }
                                readPos += ce.getLength();
                                break;
                            }
                        case EQ:
                        case M:
                        case X:
                            {
                                for (int i = 0; i < ce.getLength(); ++i) {
                                    findContext(rec.getReferenceIndex(), refPos0 + i, Allele.create(String.valueOf(genomicSeq.charAt(refPos0 + i)), true)).getSample(sampleName).getAllele(Allele.create(String.valueOf((char) bases[readPos + i]), false)).incr(rec.getReadNegativeStrandFlag());
                                }
                                readPos += ce.getLength();
                                refPos0 += ce.getLength();
                                break;
                            }
                        default:
                            throw new IllegalStateException("Case statement didn't deal with cigar op: " + op);
                    }
                }
            } else {
                break;
            }
        }
        while (!buffer.isEmpty()) buffer.remove(0).print();
        progress.finish();
        iter.close();
        iter = null;
        this.variantContextWriter.close();
        this.variantContextWriter = null;
        return RETURN_OK;
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(this.referenceGenome);
        CloserUtil.close(this.variantContextWriter);
    }
}
Also used : Allele(htsjdk.variant.variantcontext.Allele) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) VCFStandardHeaderLines(htsjdk.variant.vcf.VCFStandardHeaderLines) VCFHeader(htsjdk.variant.vcf.VCFHeader) CigarElement(htsjdk.samtools.CigarElement) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) CigarOperator(htsjdk.samtools.CigarOperator) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ReferenceGenomeFactory(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceGenomeFactory) Map(java.util.Map) CloserUtil(htsjdk.samtools.util.CloserUtil) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) Cigar(htsjdk.samtools.Cigar) SequenceUtil(htsjdk.samtools.util.SequenceUtil) Parameter(com.beust.jcommander.Parameter) HashMap(java.util.HashMap) Term(com.github.lindenb.semontology.Term) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFConstants(htsjdk.variant.vcf.VCFConstants) Counter(com.github.lindenb.jvarkit.util.Counter) Locatable(htsjdk.samtools.util.Locatable) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ReferenceContig(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceContig) File(java.io.File) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) SamRecordJEXLFilter(com.github.lindenb.jvarkit.util.samtools.SamRecordJEXLFilter) ConcatSam(com.github.lindenb.jvarkit.tools.misc.ConcatSam) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) ReferenceGenome(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceGenome) ReferenceContig(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceContig) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ReferenceGenomeFactory(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceGenomeFactory) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ReferenceGenomeFactory(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceGenomeFactory) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ConcatSam(com.github.lindenb.jvarkit.tools.misc.ConcatSam)

Example 47 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VcfRegistryCGI method doWork.

private void doWork(XMLStreamWriter w, final GroupFile gf) throws XMLStreamException {
    Position pos = parsePosition();
    if (pos == null)
        return;
    w.writeStartElement("div");
    w.writeStartElement("h2");
    w.writeCharacters(pos.chrom + ":" + pos.pos);
    w.writeEndElement();
    w.writeStartElement("table");
    w.writeStartElement("thead");
    w.writeStartElement("tr");
    for (String header : new String[] { "CHROM", "POS", "ID", "REF", "QUAL", "Sample", "Alleles", "DP", "GQ", "File" }) {
        w.writeStartElement("th");
        w.writeCharacters(header);
        // td
        w.writeEndElement();
    }
    // tr
    w.writeEndElement();
    // thead
    w.writeEndElement();
    w.writeStartElement("tbody");
    Set<String> samplesWithGenotypes = new HashSet<String>();
    Set<String> allSamples = new HashSet<String>();
    for (VcfFile f : getVcfFiles(gf)) {
        TabixReader tabixReader = null;
        TabixReader.Iterator iter = null;
        BlockCompressedInputStream bgzin = null;
        VCFHeader header = null;
        AbstractVCFCodec vcfCodec = VCFUtils.createDefaultVCFCodec();
        LineIterator lineIterator = null;
        for (int i = 0; i < 2; i++) {
            try {
                if (i == 0) {
                    bgzin = new BlockCompressedInputStream(f.file);
                    lineIterator = new LineIteratorImpl(new SynchronousLineReader(bgzin));
                    header = (VCFHeader) vcfCodec.readActualHeader(lineIterator);
                    allSamples.addAll(header.getGenotypeSamples());
                } else {
                    tabixReader = new TabixReader(f.file.getPath());
                    String line;
                    int[] x = tabixReader.parseReg(pos.chrom + ":" + pos.pos + "-" + (pos.pos));
                    if (x != null && x.length > 2 && x[0] != -1) {
                        iter = tabixReader.query(x[0], x[1], x[2]);
                    } else {
                    }
                    while (iter != null && (line = iter.next()) != null) {
                        VariantContext var = vcfCodec.decode(line);
                        for (String sample : header.getSampleNamesInOrder()) {
                            final Genotype genotype = var.getGenotype(sample);
                            if (genotype == null || !genotype.isCalled())
                                continue;
                            if (!genotype.isAvailable())
                                continue;
                            samplesWithGenotypes.add(sample);
                            w.writeStartElement("tr");
                            w.writeStartElement("td");
                            w.writeCharacters(var.getContig());
                            w.writeEndElement();
                            w.writeStartElement("td");
                            w.writeCharacters(String.valueOf(var.getStart()));
                            w.writeEndElement();
                            if (var.hasID()) {
                                w.writeStartElement("td");
                                if (var.getID().matches("rs[0-9]+")) {
                                    w.writeStartElement("a");
                                    w.writeAttribute("href", "http://www.ncbi.nlm.nih.gov/snp/" + var.getID().substring(2));
                                    w.writeCharacters(var.getID());
                                    // a
                                    w.writeEndElement();
                                } else {
                                    w.writeCharacters(var.getID());
                                }
                                // td
                                w.writeEndElement();
                            } else {
                                w.writeEmptyElement("td");
                            }
                            if (var.getReference() != null) {
                                w.writeStartElement("td");
                                w.writeCharacters(var.getReference().getBaseString());
                                w.writeEndElement();
                            } else {
                                w.writeEmptyElement("td");
                            }
                            if (var.hasLog10PError()) {
                                w.writeStartElement("td");
                                w.writeCharacters(String.valueOf((int) var.getPhredScaledQual()));
                                w.writeEndElement();
                            } else {
                                w.writeEmptyElement("td");
                            }
                            w.writeStartElement("td");
                            w.writeCharacters(sample);
                            w.writeEndElement();
                            List<Allele> alleles = genotype.getAlleles();
                            w.writeStartElement("td");
                            w.writeStartElement("span");
                            if (genotype.isHomRef()) {
                                w.writeAttribute("style", "color:green;");
                            } else if (genotype.isHomVar()) {
                                w.writeAttribute("style", "color:red;");
                            } else if (genotype.isHet()) {
                                w.writeAttribute("style", "color:blue;");
                            }
                            for (int j = 0; j < alleles.size(); ++j) {
                                if (j > 0)
                                    w.writeCharacters(genotype.isPhased() ? "|" : "/");
                                w.writeCharacters(alleles.get(j).getBaseString());
                            }
                            // span
                            w.writeEndElement();
                            w.writeEndElement();
                            if (genotype.hasDP()) {
                                w.writeStartElement("td");
                                w.writeCharacters(String.valueOf(genotype.getDP()));
                                w.writeEndElement();
                            } else {
                                w.writeEmptyElement("td");
                            }
                            if (genotype.hasGQ()) {
                                w.writeStartElement("td");
                                w.writeCharacters(String.valueOf(genotype.getGQ()));
                                w.writeEndElement();
                            } else {
                                w.writeEmptyElement("td");
                            }
                            w.writeStartElement("td");
                            w.writeCharacters(f.file.getName());
                            w.writeEndElement();
                            // tr
                            w.writeEndElement();
                            w.flush();
                        }
                    }
                }
            } catch (Exception err) {
                w.writeComment("BOUM " + err);
                header = null;
                lastException = err;
            } finally {
                CloserUtil.close(lineIterator);
                CloserUtil.close(bgzin);
                CloserUtil.close(tabixReader);
                CloserUtil.close(iter);
            }
            if (i == 0 && header == null)
                break;
        }
        w.flush();
    }
    // tbody
    w.writeEndElement();
    // table
    w.writeEndElement();
    allSamples.removeAll(samplesWithGenotypes);
    if (!allSamples.isEmpty()) {
        w.writeStartElement("h3");
        w.writeCharacters("Samples not found");
        w.writeEndElement();
        w.writeStartElement("ol");
        for (String sample : new TreeSet<String>(allSamples)) {
            w.writeStartElement("li");
            w.writeCharacters(sample);
            w.writeEndElement();
        }
        w.writeEndElement();
    }
    writeHTMLException(w, this.lastException);
    // div
    w.writeEndElement();
}
Also used : TabixReader(htsjdk.tribble.readers.TabixReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) Genotype(htsjdk.variant.variantcontext.Genotype) AbstractVCFCodec(htsjdk.variant.vcf.AbstractVCFCodec) LineIterator(htsjdk.tribble.readers.LineIterator) XMLStreamException(javax.xml.stream.XMLStreamException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) Allele(htsjdk.variant.variantcontext.Allele) TreeSet(java.util.TreeSet) SynchronousLineReader(htsjdk.tribble.readers.SynchronousLineReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) LineIteratorImpl(htsjdk.tribble.readers.LineIteratorImpl) BlockCompressedInputStream(htsjdk.samtools.util.BlockCompressedInputStream) HashSet(java.util.HashSet)

Example 48 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class CaseControlPlot method doWork.

@Override
public int doWork(final List<String> args) {
    ArchiveFactory archiveFactory = null;
    VcfIterator in = null;
    VariantContextWriter teeVariantWriter = null;
    final List<CaseControlExtractor> excractors = new ArrayList<>();
    try {
        in = super.openVcfIterator(oneFileOrNull(args));
        final VCFHeader header = in.getHeader();
        excractors.addAll(parseConfigFile(header));
        final Pedigree pedigree;
        if (this.pedigreeFile != null) {
            pedigree = Pedigree.newParser().parse(this.pedigreeFile);
        } else {
            pedigree = Pedigree.newParser().parse(header);
        }
        if (pedigree == null || pedigree.isEmpty()) {
            LOG.error("No pedigree defined , or it is empty");
            return -1;
        }
        final Set<Pedigree.Person> casepersons = pedigree.getPersons().stream().filter(F -> F.isAffected() && header.getSampleNamesInOrder().indexOf(F.getId()) != -1).collect(Collectors.toSet());
        if (casepersons.isEmpty()) {
            LOG.error("No Affected individuals in pedigree/header");
            return -1;
        }
        final Set<Pedigree.Person> controlpersons = pedigree.getPersons().stream().filter(F -> F.isUnaffected() && header.getSampleNamesInOrder().indexOf(F.getId()) != -1).collect(Collectors.toSet());
        if (controlpersons.isEmpty()) {
            LOG.error("No Unaffected individuals in pedigree/header");
            return -1;
        }
        if (this.teeToStdout) {
            teeVariantWriter = super.openVariantContextWriter(null);
            teeVariantWriter.writeHeader(header);
        }
        archiveFactory = ArchiveFactory.open(this.outputDirOrZip);
        for (final CaseControlExtractor extractor : excractors) {
            extractor.pw = archiveFactory.openWriter(extractor.name);
        }
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        while (in.hasNext()) {
            final VariantContext ctx = progress.watch(in.next());
            if (teeVariantWriter != null)
                teeVariantWriter.add(ctx);
            for (final CaseControlExtractor handler : excractors) {
                handler.visit(ctx, casepersons, controlpersons);
            }
        }
        for (final CaseControlExtractor handler : excractors) {
            handler.close();
        }
        progress.finish();
        if (teeVariantWriter != null) {
            teeVariantWriter.close();
            teeVariantWriter = null;
        }
        in.close();
        in = null;
        archiveFactory.close();
        archiveFactory = null;
        return RETURN_OK;
    } catch (final Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(in);
        CloserUtil.close(archiveFactory);
        CloserUtil.close(teeVariantWriter);
        for (final CaseControlExtractor handler : excractors) {
            handler.close();
        }
    }
}
Also used : Allele(htsjdk.variant.variantcontext.Allele) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) StringUtil(htsjdk.samtools.util.StringUtil) Document(org.w3c.dom.Document) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) Map(java.util.Map) Node(org.w3c.dom.Node) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) ScriptException(javax.script.ScriptException) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Set(java.util.Set) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) SimpleBindings(javax.script.SimpleBindings) List(java.util.List) Element(org.w3c.dom.Element) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VcfTools(com.github.lindenb.jvarkit.util.vcf.VcfTools) VariantContext(htsjdk.variant.variantcontext.VariantContext) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) ArchiveFactory(com.github.lindenb.jvarkit.io.ArchiveFactory) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) ScriptException(javax.script.ScriptException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Pedigree(com.github.lindenb.jvarkit.util.Pedigree) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 49 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class VcfBurdenFilterGenes method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator in, final VariantContextWriter out) {
    final VCFHeader header = in.getHeader();
    try {
        final VCFHeader h2 = addMetaData(new VCFHeader(header));
        final VCFFilterHeaderLine filterControlsHeader;
        if (!StringUtil.isBlank(this.filterTag)) {
            filterControlsHeader = new VCFFilterHeaderLine(this.filterTag.trim(), "Genes not in list " + this.geneFile);
            h2.addMetaDataLine(filterControlsHeader);
        } else {
            filterControlsHeader = null;
        }
        final List<String> lookColumns = Arrays.asList("CCDS", "Feature", "ENSP", "Gene", "HGNC", "HGNC_ID", "SYMBOL", "RefSeq");
        final VepPredictionParser vepParser = new VepPredictionParserFactory(header).get();
        final AnnPredictionParser annParser = new AnnPredictionParserFactory(header).get();
        final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(header.getSequenceDictionary()).logger(LOG);
        out.writeHeader(h2);
        while (in.hasNext() && !out.checkError()) {
            final VariantContext ctx = progess.watch(in.next());
            boolean keep = false;
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            // not just set FILTER ?
            if (filterControlsHeader == null) {
                vcb.rmAttribute(vepParser.getTag());
                vcb.rmAttribute(annParser.getTag());
            }
            final List<String> newVepList = new ArrayList<>();
            for (final String predStr : ctx.getAttributeAsList(vepParser.getTag()).stream().map(O -> String.class.cast(O)).collect(Collectors.toList())) {
                final VepPredictionParser.VepPrediction pred = vepParser.parseOnePrediction(ctx, predStr);
                for (final String col : lookColumns) {
                    final String token = pred.getByCol(col);
                    if (!StringUtil.isBlank(token) && this.geneNames.contains(token)) {
                        newVepList.add(predStr);
                        keep = true;
                        break;
                    }
                }
            }
            final List<String> newEffList = new ArrayList<>();
            for (final String predStr : ctx.getAttributeAsList(annParser.getTag()).stream().map(O -> String.class.cast(O)).collect(Collectors.toList())) {
                final AnnPredictionParser.AnnPrediction pred = annParser.parseOnePrediction(predStr);
                final String token = pred.getGeneName();
                if (!StringUtil.isBlank(token) && this.geneNames.contains(token)) {
                    newEffList.add(predStr);
                    keep = true;
                    break;
                }
            }
            // not just set FILTER ?
            if (filterControlsHeader == null) {
                if (!newVepList.isEmpty())
                    vcb.attribute(vepParser.getTag(), newVepList);
                if (!newEffList.isEmpty())
                    vcb.attribute(annParser.getTag(), newEffList);
            }
            if (filterControlsHeader != null) {
                if (!keep) {
                    vcb.filter(filterControlsHeader.getID());
                } else if (!ctx.isFiltered()) {
                    vcb.passFilters();
                }
                out.add(vcb.make());
            } else {
                if (keep)
                    out.add(vcb.make());
            }
        }
        progess.finish();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(in);
    }
}
Also used : AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) Arrays(java.util.Arrays) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) AnnPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParser) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StringUtil(htsjdk.samtools.util.StringUtil) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) Files(java.nio.file.Files) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VepPredictionParser(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParser) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) AnnPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.AnnPredictionParserFactory) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) VCFHeader(htsjdk.variant.vcf.VCFHeader) VepPredictionParserFactory(com.github.lindenb.jvarkit.util.vcf.predictions.VepPredictionParserFactory)

Example 50 with VCFHeader

use of htsjdk.variant.vcf.VCFHeader in project jvarkit by lindenb.

the class EvsToVcf method doWork.

@Override
public int doWork(List<String> args) {
    VariantContextWriter out = null;
    try {
        if (!args.isEmpty()) {
            LOG.error("Illegal number of arguments");
            return -1;
        }
        JAXBContext jc = JAXBContext.newInstance(SnpData.class);
        Unmarshaller unmarshaller = jc.createUnmarshaller();
        out = VCFUtils.createVariantContextWriterToStdout();
        SAMSequenceDictionary dict = new SAMSequenceDictionary();
        _fillDict(dict, "1", 249250621);
        _fillDict(dict, "2", 243199373);
        _fillDict(dict, "3", 198022430);
        _fillDict(dict, "4", 191154276);
        _fillDict(dict, "5", 180915260);
        _fillDict(dict, "6", 171115067);
        _fillDict(dict, "7", 159138663);
        _fillDict(dict, "8", 146364022);
        _fillDict(dict, "9", 141213431);
        _fillDict(dict, "10", 135534747);
        _fillDict(dict, "11", 135006516);
        _fillDict(dict, "12", 133851895);
        _fillDict(dict, "13", 115169878);
        _fillDict(dict, "14", 107349540);
        _fillDict(dict, "15", 102531392);
        _fillDict(dict, "16", 90354753);
        _fillDict(dict, "17", 81195210);
        _fillDict(dict, "18", 78077248);
        _fillDict(dict, "19", 59128983);
        _fillDict(dict, "20", 63025520);
        _fillDict(dict, "21", 48129895);
        _fillDict(dict, "22", 51304566);
        _fillDict(dict, "X", 155270560);
        _fillDict(dict, "Y", 59373566);
        _fillDict(dict, "MT", 16569);
        VCFHeader header = new VCFHeader();
        header.setSequenceDictionary(dict);
        header.addMetaDataLine(new VCFInfoHeaderLine("CONS", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScore"));
        header.addMetaDataLine(new VCFInfoHeaderLine("GERP", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
        header.addMetaDataLine(new VCFInfoHeaderLine("uaMAF", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
        header.addMetaDataLine(new VCFInfoHeaderLine("aaMAF", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
        header.addMetaDataLine(new VCFInfoHeaderLine("totalMAF", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
        header.addMetaDataLine(new VCFInfoHeaderLine("DP", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Integer, "conservationScoreGERP"));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));
        out.writeHeader(header);
        Pattern comma = Pattern.compile("[,]");
        XMLInputFactory xif = XMLInputFactory.newFactory();
        xif.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false);
        XMLEventReader xmlr = xif.createXMLEventReader(System.in);
        while (xmlr.hasNext() && !System.out.checkError()) {
            XMLEvent evt = xmlr.peek();
            if (!evt.isStartElement() || !evt.asStartElement().getName().getLocalPart().equals("snpList")) {
                xmlr.nextEvent();
                continue;
            }
            SnpData snpData = unmarshaller.unmarshal(xmlr, SnpData.class).getValue();
            VariantContextBuilder vcb = new VariantContextBuilder();
            Set<Allele> alleles = new HashSet<Allele>();
            alleles.add(Allele.create(snpData.getRefAllele(), true));
            for (String s : comma.split(snpData.getAltAlleles())) {
                if (isEmpty(s))
                    continue;
                alleles.add(Allele.create(s, false));
            }
            vcb.chr(snpData.getChromosome());
            vcb.start(snpData.getChrPosition());
            vcb.stop(snpData.getChrPosition() + snpData.getRefAllele().length() - 1);
            if (!isEmpty(snpData.getRsIds()) && !snpData.getRsIds().equals("none")) {
                vcb.id(snpData.getRsIds());
            }
            vcb.alleles(alleles);
            Float d = parseDouble(snpData.getConservationScore());
            if (d != null) {
                vcb.attribute("CONS", d);
            }
            d = parseDouble(snpData.getConservationScoreGERP());
            if (d != null) {
                vcb.attribute("GERP", d);
            }
            vcb.attribute("uaMAF", (float) snpData.getUaMAF());
            vcb.attribute("aaMAF", (float) snpData.getAaMAF());
            vcb.attribute("totalMAF", (float) snpData.getTotalMAF());
            vcb.attribute("DP", snpData.getAvgSampleReadDepth());
            out.add(vcb.make());
        }
        xmlr.close();
        out.close();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
    }
}
Also used : Pattern(java.util.regex.Pattern) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) JAXBContext(javax.xml.bind.JAXBContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) XMLEvent(javax.xml.stream.events.XMLEvent) XMLEventReader(javax.xml.stream.XMLEventReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) SnpData(edu.washington.gs.evs.SnpData) Unmarshaller(javax.xml.bind.Unmarshaller) VCFHeader(htsjdk.variant.vcf.VCFHeader) XMLInputFactory(javax.xml.stream.XMLInputFactory) HashSet(java.util.HashSet)

Aggregations

VCFHeader (htsjdk.variant.vcf.VCFHeader)182 VariantContext (htsjdk.variant.variantcontext.VariantContext)113 File (java.io.File)93 ArrayList (java.util.ArrayList)79 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)73 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)64 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)63 HashSet (java.util.HashSet)60 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)58 IOException (java.io.IOException)55 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)52 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)49 Genotype (htsjdk.variant.variantcontext.Genotype)48 Allele (htsjdk.variant.variantcontext.Allele)47 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)47 List (java.util.List)44 Set (java.util.Set)38 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)36 CloserUtil (htsjdk.samtools.util.CloserUtil)35 Collectors (java.util.stream.Collectors)34