Search in sources :

Example 1 with VCFInfoHeaderLine

use of htsjdk.variant.vcf.VCFInfoHeaderLine in project hmftools by hartwigmedical.

the class BreakPointInspectorApplication method main.

public static void main(final String... args) throws IOException {
    final AnalysisBuilder analysisBuilder = new AnalysisBuilder();
    final Options options = createOptions();
    try {
        final CommandLine cmd = createCommandLine(options, args);
        final String refPath = cmd.getOptionValue(REF_PATH);
        final String refSlicePath = cmd.getOptionValue(REF_SLICE);
        final String tumorPath = cmd.getOptionValue(TUMOR_PATH);
        final String tumorSlicePath = cmd.getOptionValue(TUMOR_SLICE);
        final String vcfPath = cmd.getOptionValue(VCF);
        if (cmd.hasOption(PROXIMITY)) {
            analysisBuilder.setRange(Integer.parseInt(cmd.getOptionValue(PROXIMITY, "500")));
        }
        if (cmd.hasOption(CONTAMINATION)) {
            analysisBuilder.setContaminationFraction(Float.parseFloat(cmd.getOptionValue(CONTAMINATION, "0")));
        }
        if (refPath == null || tumorPath == null || vcfPath == null) {
            printHelpAndExit(options);
            return;
        }
        final File tumorBAM = new File(tumorPath);
        final SamReader tumorReader = SamReaderFactory.makeDefault().open(tumorBAM);
        final File refBAM = new File(refPath);
        final SamReader refReader = SamReaderFactory.makeDefault().open(refBAM);
        final File vcfFile = new File(vcfPath);
        final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false);
        final List<String> samples = vcfReader.getFileHeader().getGenotypeSamples();
        if (samples.size() != 2) {
            System.err.println("could not determine tumor and sample from VCF");
            System.exit(1);
            return;
        }
        TSVOutput.PrintHeaders();
        final Analysis analysis = analysisBuilder.setRefReader(refReader).setTumorReader(tumorReader).createAnalysis();
        final List<QueryInterval> combinedQueryIntervals = Lists.newArrayList();
        final Map<String, VariantContext> variantMap = new HashMap<>();
        final List<VariantContext> variants = Lists.newArrayList();
        for (VariantContext variant : vcfReader) {
            variantMap.put(variant.getID(), variant);
            final VariantContext mateVariant = variant;
            if (variant.hasAttribute("MATEID")) {
                variant = variantMap.get(variant.getAttributeAsString("MATEID", ""));
                if (variant == null) {
                    continue;
                }
            }
            final String location = variant.getContig() + ":" + Integer.toString(variant.getStart());
            final Location location1 = Location.parseLocationString(location, tumorReader.getFileHeader().getSequenceDictionary());
            final Range uncertainty1 = extractCIPOS(variant);
            final List<Integer> CIEND = variant.getAttributeAsIntList("CIEND", 0);
            Range uncertainty2 = CIEND.size() == 2 ? new Range(CIEND.get(0), CIEND.get(1)) : null;
            final boolean IMPRECISE = variant.hasAttribute("IMPRECISE");
            HMFVariantType svType;
            final Location location2;
            switch(variant.getStructuralVariantType()) {
                case INS:
                    svType = HMFVariantType.INS;
                    location2 = location1.set(variant.getAttributeAsInt("END", 0));
                    break;
                case INV:
                    if (variant.hasAttribute("INV3")) {
                        svType = HMFVariantType.INV3;
                    } else if (variant.hasAttribute("INV5")) {
                        svType = HMFVariantType.INV5;
                    } else {
                        System.err.println(variant.getID() + " : expected either INV3 or INV5 flag");
                        continue;
                    }
                    location2 = location1.add(Math.abs(variant.getAttributeAsInt("SVLEN", 0)));
                    break;
                case DEL:
                    svType = HMFVariantType.DEL;
                    location2 = location1.add(Math.abs(variant.getAttributeAsInt("SVLEN", 0)));
                    break;
                case DUP:
                    svType = HMFVariantType.DUP;
                    location2 = location1.add(Math.abs(variant.getAttributeAsInt("SVLEN", 0)));
                    break;
                case BND:
                    // process the breakend string
                    final String call = variant.getAlternateAllele(0).getDisplayString();
                    final String[] leftSplit = call.split("\\]");
                    final String[] rightSplit = call.split("\\[");
                    if (leftSplit.length >= 2) {
                        location2 = Location.parseLocationString(leftSplit[1], tumorReader.getFileHeader().getSequenceDictionary());
                        if (leftSplit[0].length() > 0) {
                            svType = HMFVariantType.INV3;
                            uncertainty2 = Range.invert(uncertainty1);
                        } else {
                            svType = HMFVariantType.DUP;
                            uncertainty2 = uncertainty1;
                        }
                    } else if (rightSplit.length >= 2) {
                        location2 = Location.parseLocationString(rightSplit[1], tumorReader.getFileHeader().getSequenceDictionary());
                        if (rightSplit[0].length() > 0) {
                            svType = HMFVariantType.DEL;
                            uncertainty2 = uncertainty1;
                        } else {
                            svType = HMFVariantType.INV5;
                            uncertainty2 = Range.invert(uncertainty1);
                        }
                    } else {
                        System.err.println(variant.getID() + " : could not parse breakpoint");
                        continue;
                    }
                    if (IMPRECISE) {
                        uncertainty2 = extractCIPOS(mateVariant);
                    }
                    break;
                default:
                    System.err.println(variant.getID() + " : UNEXPECTED SVTYPE=" + variant.getStructuralVariantType());
                    continue;
            }
            final HMFVariantContext ctx = new HMFVariantContext(variant.getID(), location1, location2, svType, IMPRECISE);
            ctx.Filter.addAll(variant.getFilters().stream().filter(s -> !s.startsWith("BPI")).collect(Collectors.toSet()));
            ctx.Uncertainty1 = uncertainty1;
            ctx.Uncertainty2 = ObjectUtils.firstNonNull(uncertainty2, fixup(uncertainty1, IMPRECISE, svType == HMFVariantType.INV3 || svType == HMFVariantType.INV5));
            ctx.HomologySequence = variant.getAttributeAsString("HOMSEQ", "");
            if (variant.hasAttribute("LEFT_SVINSSEQ") && variant.hasAttribute("RIGHT_SVINSSEQ")) {
                ctx.InsertSequence = variant.getAttributeAsString("LEFT_SVINSSEQ", "") + "..." + variant.getAttributeAsString("RIGHT_SVINSSEQ", "");
            } else {
                ctx.InsertSequence = variant.getAttributeAsString("SVINSSEQ", "");
            }
            ctx.BND = variant.getStructuralVariantType() == StructuralVariantType.BND;
            switch(ctx.Type) {
                case INS:
                case DEL:
                    ctx.OrientationBP1 = 1;
                    ctx.OrientationBP2 = -1;
                    break;
                case INV3:
                    ctx.OrientationBP1 = 1;
                    ctx.OrientationBP2 = 1;
                    break;
                case INV5:
                    ctx.OrientationBP1 = -1;
                    ctx.OrientationBP2 = -1;
                    break;
                case DUP:
                    ctx.OrientationBP1 = -1;
                    ctx.OrientationBP2 = 1;
                    break;
            }
            final StructuralVariantResult result = analysis.processStructuralVariant(ctx);
            combinedQueryIntervals.addAll(asList(result.QueryIntervals));
            TSVOutput.print(variant, ctx, result);
            final BiConsumer<VariantContext, Boolean> vcfUpdater = (v, swap) -> {
                final Set<String> filters = v.getCommonInfo().getFiltersMaybeNull();
                if (filters != null) {
                    filters.clear();
                }
                // we will map BreakpointError to a flag
                if (result.Filters.contains(Filter.Filters.BreakpointError.toString())) {
                    v.getCommonInfo().putAttribute("BPI_AMBIGUOUS", true, true);
                } else {
                    v.getCommonInfo().addFilters(result.Filters);
                }
                if (result.Filters.isEmpty()) {
                    final List<Double> af = asList(result.AlleleFrequency.getLeft(), result.AlleleFrequency.getRight());
                    v.getCommonInfo().putAttribute(AlleleFrequency.VCF_INFO_TAG, swap ? Lists.reverse(af) : af, true);
                }
                if (result.Breakpoints.getLeft() != null) {
                    v.getCommonInfo().putAttribute(swap ? "BPI_END" : "BPI_START", result.Breakpoints.getLeft().Position, true);
                }
                if (result.Breakpoints.getRight() != null) {
                    v.getCommonInfo().putAttribute(swap ? "BPI_START" : "BPI_END", result.Breakpoints.getRight().Position, true);
                }
                // remove CIPOS / CIEND when we have an insert sequence
                if (!v.hasAttribute("IMPRECISE") && v.hasAttribute("SVINSSEQ")) {
                    v.getCommonInfo().removeAttribute("CIPOS");
                    v.getCommonInfo().removeAttribute("CIEND");
                }
                variants.add(v);
            };
            vcfUpdater.accept(variant, false);
            if (mateVariant != variant) {
                vcfUpdater.accept(mateVariant, true);
            }
        }
        // TODO: update START, END with BPI values and save Manta values in new attributes
        final String vcfOutputPath = cmd.getOptionValue(VCF_OUT);
        if (vcfOutputPath != null) {
            final VCFHeader header = vcfReader.getFileHeader();
            header.addMetaDataLine(new VCFInfoHeaderLine("BPI_START", 1, VCFHeaderLineType.Integer, "BPI adjusted breakend location"));
            header.addMetaDataLine(new VCFInfoHeaderLine("BPI_END", 1, VCFHeaderLineType.Integer, "BPI adjusted breakend location"));
            header.addMetaDataLine(new VCFInfoHeaderLine("BPI_AMBIGUOUS", 0, VCFHeaderLineType.Flag, "BPI could not determine the breakpoints, inspect manually"));
            header.addMetaDataLine(new VCFHeaderLine("bpiVersion", BreakPointInspectorApplication.class.getPackage().getImplementationVersion()));
            Filter.UpdateVCFHeader(header);
            AlleleFrequency.UpdateVCFHeader(header);
            // setup VCF
            final VariantContextWriter writer = new VariantContextWriterBuilder().setReferenceDictionary(header.getSequenceDictionary()).setOutputFile(vcfOutputPath).build();
            writer.writeHeader(header);
            // write variants
            variants.sort(new VariantContextComparator(header.getSequenceDictionary()));
            variants.forEach(writer::add);
            writer.close();
        }
        final QueryInterval[] optimizedIntervals = QueryInterval.optimizeIntervals(combinedQueryIntervals.toArray(new QueryInterval[combinedQueryIntervals.size()]));
        if (tumorSlicePath != null) {
            writeToSlice(tumorSlicePath, tumorReader, optimizedIntervals);
        }
        if (refSlicePath != null) {
            writeToSlice(refSlicePath, refReader, optimizedIntervals);
        }
        refReader.close();
        tumorReader.close();
    } catch (ParseException e) {
        printHelpAndExit(options);
        System.exit(1);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) Options(org.apache.commons.cli.Options) HashMap(java.util.HashMap) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) HelpFormatter(org.apache.commons.cli.HelpFormatter) DefaultParser(org.apache.commons.cli.DefaultParser) Lists(com.google.common.collect.Lists) Arrays.asList(java.util.Arrays.asList) ObjectUtils(org.apache.commons.lang3.ObjectUtils) Map(java.util.Map) BiConsumer(java.util.function.BiConsumer) CommandLine(org.apache.commons.cli.CommandLine) Option(org.apache.commons.cli.Option) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) CommandLineParser(org.apache.commons.cli.CommandLineParser) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Set(java.util.Set) IOException(java.io.IOException) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) File(java.io.File) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator) List(java.util.List) QueryInterval(htsjdk.samtools.QueryInterval) ParseException(org.apache.commons.cli.ParseException) StructuralVariantType(htsjdk.variant.variantcontext.StructuralVariantType) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) NotNull(org.jetbrains.annotations.NotNull) SamReaderFactory(htsjdk.samtools.SamReaderFactory) Options(org.apache.commons.cli.Options) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) Set(java.util.Set) HashMap(java.util.HashMap) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) QueryInterval(htsjdk.samtools.QueryInterval) VariantContextComparator(htsjdk.variant.variantcontext.VariantContextComparator) SamReader(htsjdk.samtools.SamReader) Arrays.asList(java.util.Arrays.asList) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) CommandLine(org.apache.commons.cli.CommandLine) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) ParseException(org.apache.commons.cli.ParseException) File(java.io.File)

Example 2 with VCFInfoHeaderLine

use of htsjdk.variant.vcf.VCFInfoHeaderLine in project ASCIIGenome by dariober.

the class GenotypeMatrix method formatJsScriptWithInfo.

/**
 * Replace INFO tags in the jsScript with the actual values found in the variant context object
 */
@SuppressWarnings("unchecked")
private String formatJsScriptWithInfo(String jsScript, VariantContext ctx, VCFHeader vcfHeader) {
    Iterator<VCFInfoHeaderLine> iter = vcfHeader.getInfoHeaderLines().iterator();
    while (iter.hasNext()) {
        // We iterate through each key in the header and see if there is a match in JS script.
        VCFInfoHeaderLine headerLine = iter.next();
        String key = headerLine.getID();
        if (jsScript.contains('{' + key + '}') || jsScript.contains("{INFO/" + key + '}')) {
            Object unkValue = ctx.getAttributes().get(key);
            String fmtValue;
            try {
                List<Object> unknList = (List<Object>) unkValue;
                StringBuilder listParam = new StringBuilder();
                listParam.append("[");
                for (Object unk : unknList) {
                    listParam.append(this.formatObjectForJS(key, unk, vcfHeader.getInfoHeaderLine(key).getType()) + ", ");
                }
                fmtValue = listParam.append("]").toString();
            } catch (ClassCastException e) {
                fmtValue = this.formatObjectForJS(key, unkValue, vcfHeader.getInfoHeaderLine(key).getType());
            } catch (NullPointerException e) {
                if (headerLine.getType().equals(VCFHeaderLineType.Flag)) {
                    // A flag type returns null if the flag is missing, which is odd. Shouldn't it return false?
                    fmtValue = "false";
                } else {
                    fmtValue = "null";
                }
            }
            jsScript = jsScript.replace("{INFO/" + key + '}', fmtValue);
            jsScript = jsScript.replace('{' + key + '}', fmtValue);
        }
    }
    return jsScript;
}
Also used : ArrayList(java.util.ArrayList) List(java.util.List) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine)

Example 3 with VCFInfoHeaderLine

use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.

the class VcfBiomart method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator iter, final VariantContextWriter out) {
    HttpGet httpGet = null;
    final Pattern tab = Pattern.compile("[\t]");
    try {
        final TransformerFactory factory = TransformerFactory.newInstance();
        final Transformer transformer = factory.newTransformer();
        // transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
        final VCFHeader header = iter.getHeader();
        StringBuilder desc = new StringBuilder("Biomart query. Format: ");
        desc.append(this.attributes.stream().map(S -> this.printLabels ? S + "|" + S : S).collect(Collectors.joining("|")));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        header.addMetaDataLine(new VCFInfoHeaderLine(this.TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, desc.toString()));
        out.writeHeader(header);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        while (iter.hasNext()) {
            final VariantContext ctx = progress.watch(iter.next());
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.rmAttribute(this.TAG);
            this.filterColumnContig.set(ctx.getContig());
            this.filterColumnStart.set(String.valueOf(ctx.getStart()));
            this.filterColumnEnd.set(String.valueOf(ctx.getEnd()));
            final StringWriter domToStr = new StringWriter();
            transformer.transform(new DOMSource(this.domQuery), new StreamResult(domToStr));
            final URIBuilder builder = new URIBuilder(this.serviceUrl);
            builder.addParameter("query", domToStr.toString());
            // System.err.println("\nwget -O - 'http://grch37.ensembl.org/biomart/martservice?query="+escapedQuery+"'\n");
            // escapedQuery = URLEncoder.encode(escapedQuery,"UTF-8");
            httpGet = new HttpGet(builder.build());
            final CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
            int responseCode = httpResponse.getStatusLine().getStatusCode();
            if (responseCode != 200) {
                throw new RuntimeIOException("Response code was not 200. Detected response was " + responseCode);
            }
            InputStream response = httpResponse.getEntity().getContent();
            if (this.teeResponse) {
                response = new TeeInputStream(response, stderr(), false);
            }
            final BufferedReader br = new BufferedReader(new InputStreamReader(response));
            final Set<String> infoAtts = br.lines().filter(L -> !StringUtil.isBlank(L)).filter(L -> !L.equals("[success]")).map(L -> tab.split(L)).map(T -> {
                final StringBuilder sb = new StringBuilder();
                for (int i = 0; i < this.attributes.size(); i++) {
                    if (i > 0)
                        sb.append("|");
                    if (this.printLabels)
                        sb.append(escapeInfo(this.attributes.get(i))).append("|");
                    sb.append(i < T.length ? escapeInfo(T[i]) : "");
                }
                return sb.toString();
            }).collect(Collectors.toCollection(LinkedHashSet::new));
            CloserUtil.close(br);
            CloserUtil.close(response);
            CloserUtil.close(httpResponse);
            if (!infoAtts.isEmpty()) {
                vcb.attribute(this.TAG, new ArrayList<>(infoAtts));
            }
            out.add(vcb.make());
        }
        progress.finish();
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        throw new RuntimeIOException(err);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) Transformer(javax.xml.transform.Transformer) DOMSource(javax.xml.transform.dom.DOMSource) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) StreamResult(javax.xml.transform.stream.StreamResult) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Attr(org.w3c.dom.Attr) ArrayList(java.util.ArrayList) StringUtil(htsjdk.samtools.util.StringUtil) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) Document(org.w3c.dom.Document) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) Node(org.w3c.dom.Node) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) LinkedHashSet(java.util.LinkedHashSet) CloserUtil(htsjdk.samtools.util.CloserUtil) TeeInputStream(com.github.lindenb.jvarkit.io.TeeInputStream) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) CloseableHttpClient(org.apache.http.impl.client.CloseableHttpClient) URIBuilder(org.apache.http.client.utils.URIBuilder) StringWriter(java.io.StringWriter) Logger(com.github.lindenb.jvarkit.util.log.Logger) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Set(java.util.Set) InputStreamReader(java.io.InputStreamReader) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) Element(org.w3c.dom.Element) HttpGet(org.apache.http.client.methods.HttpGet) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) DocumentBuilder(javax.xml.parsers.DocumentBuilder) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) BufferedReader(java.io.BufferedReader) Pattern(java.util.regex.Pattern) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) TransformerFactory(javax.xml.transform.TransformerFactory) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) HttpClients(org.apache.http.impl.client.HttpClients) InputStream(java.io.InputStream) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DOMSource(javax.xml.transform.dom.DOMSource) Transformer(javax.xml.transform.Transformer) HttpGet(org.apache.http.client.methods.HttpGet) VariantContext(htsjdk.variant.variantcontext.VariantContext) StringWriter(java.io.StringWriter) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) VCFHeader(htsjdk.variant.vcf.VCFHeader) Pattern(java.util.regex.Pattern) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) TransformerFactory(javax.xml.transform.TransformerFactory) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) StreamResult(javax.xml.transform.stream.StreamResult) InputStreamReader(java.io.InputStreamReader) TeeInputStream(com.github.lindenb.jvarkit.io.TeeInputStream) InputStream(java.io.InputStream) TeeInputStream(com.github.lindenb.jvarkit.io.TeeInputStream) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) URIBuilder(org.apache.http.client.utils.URIBuilder) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader)

Example 4 with VCFInfoHeaderLine

use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.

the class VcfEnsemblVepRest method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator vcfIn, final VariantContextWriter out) {
    try {
        final VCFHeader header = vcfIn.getHeader();
        final List<VariantContext> buffer = new ArrayList<>(this.batchSize + 1);
        final VCFHeader h2 = new VCFHeader(header);
        addMetaData(h2);
        for (final String tag : this.outputTags) {
            h2.addMetaDataLine(new VCFInfoHeaderLine(tag, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "__CUSTOM_DESCRIPTION__" + tag));
        }
        out.writeHeader(h2);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        for (; ; ) {
            VariantContext ctx = null;
            if (vcfIn.hasNext()) {
                buffer.add((ctx = progress.watch(vcfIn.next())));
            }
            if (ctx == null || buffer.size() >= this.batchSize) {
                if (!buffer.isEmpty()) {
                    final Map<String, EnsVepPrediction> input2pred = vep(buffer);
                    for (final VariantContext ctx2 : buffer) {
                        final String inputStr = createInputContext(ctx2);
                        final EnsVepPrediction pred = input2pred.get(inputStr);
                        final VariantContextBuilder vcb = new VariantContextBuilder(ctx2);
                        for (final String tag : this.outputTags) {
                            vcb.rmAttribute(tag);
                        }
                        if (pred == null) {
                            LOG.info("No Annotation found for " + inputStr);
                            out.add(vcb.make());
                            continue;
                        }
                        for (final String tag : this.outputTags) {
                            final Set<String> info = pred.tag2infoLines.get(tag);
                            if (info == null || info.isEmpty())
                                continue;
                            vcb.attribute(tag, new ArrayList<>(info));
                        }
                        out.add(vcb.make());
                    }
                // end of loop over variants
                }
                // end of if buffer is not empty
                if (ctx == null)
                    break;
                buffer.clear();
            }
        }
        progress.finish();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) TransformerException(javax.xml.transform.TransformerException) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 5 with VCFInfoHeaderLine

use of htsjdk.variant.vcf.VCFInfoHeaderLine in project jvarkit by lindenb.

the class VcfEnsemblReg method annotate.

private void annotate(Track track, File inf, File outf) throws IOException {
    boolean contained = false;
    LOG.info("Processing " + track.id + " (" + track.shortLabel + ") " + track.url);
    VcfIterator in = VCFUtils.createVcfIteratorFromFile(inf);
    VCFHeader header = in.getHeader();
    VCFInfoHeaderLine info = null;
    SeekableStream sstream = SeekableStreamFactory.getInstance().getStreamFor(track.url);
    BBFileReader bigFile = new BBFileReader(track.url.toString(), new SeekableStreamAdaptor(sstream));
    VariantContextWriter w1 = VCFUtils.createVariantContextWriter(outf);
    if (bigFile.isBigWigFile()) {
        info = new VCFInfoHeaderLine(track.id, 1, VCFHeaderLineType.Float, String.valueOf(track.longLabel) + " " + track.url);
    } else {
        info = new VCFInfoHeaderLine(track.id, 1, VCFHeaderLineType.String, String.valueOf(track.longLabel) + " " + track.url);
    }
    header.addMetaDataLine(info);
    w1.writeHeader(in.getHeader());
    while (in.hasNext()) {
        VariantContext ctx = in.next();
        String chrom = ctx.getContig();
        if (!chrom.startsWith("chr"))
            chrom = "chr" + chrom;
        if (!chrom.matches("(chrX|chrY|chr[0-9]|chr1[0-9]|chr2[12])")) {
            w1.add(ctx);
        } else if (bigFile.isBigWigFile()) {
            BigWigIterator iter = bigFile.getBigWigIterator(chrom, ctx.getStart() - 1, chrom, ctx.getStart(), contained);
            Float wigValue = null;
            while (iter != null && iter.hasNext() && wigValue == null) {
                WigItem item = iter.next();
                wigValue = item.getWigValue();
            }
            if (wigValue == null) {
                w1.add(ctx);
                continue;
            }
            VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.attribute(track.id, wigValue);
            w1.add(vcb.make());
        } else {
            BigBedIterator iter = bigFile.getBigBedIterator(chrom, ctx.getStart() - 1, chrom, ctx.getStart(), contained);
            Set<String> bedValues = new HashSet<String>();
            while (iter != null && iter.hasNext()) {
                BedFeature item = iter.next();
                String[] rest = item.getRestOfFields();
                if (rest == null || rest.length != 6) {
                    System.err.println(track.id + " " + Arrays.toString(item.getRestOfFields()));
                    continue;
                }
                String color = null;
                if (track.parent != null) {
                    if (track.parent.startsWith("Segway_17SegmentationSummaries")) {
                        color = segway_17SegmentationSummaries(rest[5]);
                    } else if (track.parent.startsWith("ProjectedSegments")) {
                        color = projectedSegments(rest[5]);
                    } else if (track.parent.startsWith("RegBuildOverview")) {
                        color = regBuildOverview(rest[5]);
                    } else if (track.parent.startsWith("Segway_17CellSegments")) {
                        color = segway_17CellSegments(rest[5]);
                    } else {
                        System.err.println("Unknown parent:" + track.parent);
                    }
                }
                if (color == null)
                    continue;
                bedValues.add(rest[0] + "|" + color);
            }
            if (bedValues.isEmpty()) {
                w1.add(ctx);
                continue;
            }
            StringBuilder sb = new StringBuilder();
            for (String s : bedValues) {
                if (sb.length() != 0)
                    sb.append(",");
                sb.append(s);
            }
            VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.attribute(track.id, sb.toString());
            w1.add(vcb.make());
        }
    }
    sstream.close();
    in.close();
    w1.close();
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) SeekableStream(htsjdk.samtools.seekablestream.SeekableStream) VariantContext(htsjdk.variant.variantcontext.VariantContext) BigBedIterator(org.broad.igv.bbfile.BigBedIterator) BedFeature(org.broad.igv.bbfile.BedFeature) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) WigItem(org.broad.igv.bbfile.WigItem) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) SeekableStreamAdaptor(com.github.lindenb.jvarkit.util.igv.SeekableStreamAdaptor) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BBFileReader(org.broad.igv.bbfile.BBFileReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) BigWigIterator(org.broad.igv.bbfile.BigWigIterator)

Aggregations

VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)55 VCFHeader (htsjdk.variant.vcf.VCFHeader)49 VariantContext (htsjdk.variant.variantcontext.VariantContext)37 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)37 ArrayList (java.util.ArrayList)34 HashSet (java.util.HashSet)32 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)31 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)25 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)25 Allele (htsjdk.variant.variantcontext.Allele)22 IOException (java.io.IOException)20 File (java.io.File)19 Genotype (htsjdk.variant.variantcontext.Genotype)17 GenotypeBuilder (htsjdk.variant.variantcontext.GenotypeBuilder)17 Set (java.util.Set)17 HashMap (java.util.HashMap)16 List (java.util.List)16 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)14 VCFFilterHeaderLine (htsjdk.variant.vcf.VCFFilterHeaderLine)14 VCFFormatHeaderLine (htsjdk.variant.vcf.VCFFormatHeaderLine)14