Search in sources :

Example 6 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class KnimeVariantHelper method processVcfMulti.

/**
 * process the VCF file,
 *
 * @param vcfIn input file name
 * @param fun functional
 * @return the output file name
 * @throws IOException
 */
public String processVcfMulti(final String vcfIn, final Function<VariantContext, List<VariantContext>> fun) throws IOException {
    this.lastVariantCount = 0;
    if (vcfIn == null) {
        final String msg = "Vcf Input URI/FIle is null.";
        LOG.error(msg);
        throw new IllegalArgumentException(msg);
    }
    File outVcfFile = null;
    File outVcfIndexFile = null;
    final File STOP_FILE = new File(this.workfingDirectory, "STOP");
    if (STOP_FILE.exists()) {
        final String msg = "There is a stop file in " + STOP_FILE;
        LOG.error(msg);
        throw new IOException(msg);
    }
    boolean fail_flag = false;
    VcfIterator iter = null;
    VariantContextWriter variantContextWriter = null;
    try {
        IOUtil.assertDirectoryIsReadable(this.workfingDirectory);
        IOUtil.assertDirectoryIsWritable(this.workfingDirectory);
        if (!IOUtil.isUrl(vcfIn)) {
            IOUtil.assertFileIsReadable(new File(vcfIn));
        }
        final String extension;
        if (this.forceSuffix.equals(ForceSuffix.ForceTabix)) {
            extension = ".vcf.gz";
        } else if (this.forceSuffix.equals(ForceSuffix.ForceTribble)) {
            extension = ".vcf";
        } else if (vcfIn.endsWith(".gz")) {
            extension = ".vcf.gz";
        } else {
            extension = ".vcf";
        }
        final String filename = this.createOutputFile(vcfIn, extension);
        final String indexFilename;
        if (extension.endsWith(".gz")) {
            indexFilename = filename + Tribble.STANDARD_INDEX_EXTENSION;
        } else {
            indexFilename = filename + TabixUtils.STANDARD_INDEX_EXTENSION;
        }
        outVcfFile = new File(filename);
        outVcfIndexFile = new File(indexFilename);
        LOG.info("opening " + vcfIn);
        iter = VCFUtils.createVcfIterator(vcfIn);
        super.init(iter.getHeader());
        final VCFHeader vcfHeader2;
        if (this.getExtraVcfHeaderLines().isEmpty()) {
            vcfHeader2 = iter.getHeader();
        } else {
            vcfHeader2 = new VCFHeader(iter.getHeader());
            for (final VCFHeaderLine extra : this.getExtraVcfHeaderLines()) {
                vcfHeader2.addMetaDataLine(extra);
            }
            // clear vcf header line now they 've been added to the header.
            this.getExtraVcfHeaderLines().clear();
        }
        final SAMSequenceDictionary dict = this.getHeader().getSequenceDictionary();
        if (dict == null) {
            final String msg = "There is no dictionary (##contig lines) in " + vcfIn + " but they are required.";
            LOG.error(msg);
            throw new IllegalArgumentException(msg);
        }
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        progress.setLogPrefix(this.filePrefix);
        LOG.info("writing " + outVcfFile + ". Emergency stop file is " + STOP_FILE);
        variantContextWriter = this.variantContextWriterBuilder.setOutputFile(outVcfFile).setReferenceDictionary(dict).build();
        long lastTick = System.currentTimeMillis();
        variantContextWriter.writeHeader(vcfHeader2);
        while (iter.hasNext()) {
            final VariantContext ctx = progress.watch(iter.next());
            final List<VariantContext> array = fun.apply(ctx);
            if (array != null) {
                for (final VariantContext ctx2 : array) {
                    variantContextWriter.add(ctx2);
                    this.lastVariantCount++;
                }
            }
            // check STOP File
            final long now = System.currentTimeMillis();
            if (// 10sec
            (now - lastTick) > 10 * 1000) {
                lastTick = now;
                if (STOP_FILE.exists()) {
                    LOG.warn("STOP FILE detected " + STOP_FILE + " Aborting.");
                    fail_flag = true;
                    break;
                }
            }
        }
        progress.finish();
        iter.close();
        iter = null;
        variantContextWriter.close();
        variantContextWriter = null;
        return outVcfFile.getPath();
    } catch (final Exception err) {
        fail_flag = true;
        LOG.error(err);
        throw new IOException(err);
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(variantContextWriter);
        if (fail_flag) {
            if (outVcfFile != null && outVcfFile.exists()) {
                LOG.warn("deleting " + outVcfFile);
                outVcfFile.delete();
            }
            if (outVcfIndexFile != null && outVcfIndexFile.exists()) {
                LOG.warn("deleting " + outVcfIndexFile);
                outVcfIndexFile.delete();
            }
        }
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VariantContext(htsjdk.variant.variantcontext.VariantContext) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) File(java.io.File)

Example 7 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class CompareBamAndBuild method doWork.

@Override
public int doWork(final List<String> args) {
    PrintWriter out = null;
    SortingCollection<Match> database = null;
    if (this.chainFile == null) {
        LOG.error("Chain file is not defined Option");
        return -1;
    }
    if (args.size() != 2) {
        LOG.error("Illegal number of arguments. Expected two indexed BAMS.");
        return -1;
    }
    try {
        LOG.info("load chain file");
        this.liftOver = new LiftOver(this.chainFile);
        database = SortingCollection.newInstance(Match.class, new MatchCodec(), new MatchOrdererInSortingCollection(), this.maxRecordsInRam, this.tmpDir.toPath());
        database.setDestructiveIteration(true);
        for (int currentSamFileIndex = 0; currentSamFileIndex < 2; currentSamFileIndex++) {
            final File samFile = new File(args.get(currentSamFileIndex));
            LOG.info("read " + samFile);
            this.bamFiles[currentSamFileIndex] = samFile;
            SamReader samFileReader = CompareBamAndBuild.this.openSamReader(samFile.getPath());
            final SAMSequenceDictionary dict = samFileReader.getFileHeader().getSequenceDictionary();
            this.sequenceDictionaries[currentSamFileIndex] = dict;
            if (dict.isEmpty()) {
                samFileReader.close();
                LOG.error("Empty Dict  in " + samFile);
                return -1;
            }
            final Interval interval;
            if (REGION != null) {
                IntervalParser intervalParser = new IntervalParser(dict);
                interval = intervalParser.parse(this.REGION);
                if (interval == null) {
                    samFileReader.close();
                    LOG.error("Cannot parse " + REGION + " (bad syntax or not in dictionary");
                    return -1;
                }
            } else {
                interval = null;
            }
            final SAMRecordIterator iter;
            if (interval == null) {
                iter = samFileReader.iterator();
            } else {
                iter = samFileReader.queryOverlapping(interval.getContig(), interval.getStart(), interval.getEnd());
            }
            final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
            while (iter.hasNext()) {
                final SAMRecord rec = progress.watch(iter.next());
                if (rec.isSecondaryOrSupplementary())
                    continue;
                final Match m = new Match();
                m.flag = rec.getFlags();
                m.readName = rec.getReadName();
                m.firstBamFile = currentSamFileIndex == 0;
                if (rec.getReadUnmappedFlag()) {
                    m.tid = -1;
                    m.pos = -1;
                } else {
                    m.tid = rec.getReferenceIndex();
                    m.pos = rec.getAlignmentStart();
                }
                database.add(m);
            }
            iter.close();
            samFileReader.close();
            samFileReader = null;
            LOG.info("Close " + samFile);
        }
        database.doneAdding();
        LOG.info("Writing results....");
        out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        // compute the differences for each read
        out.print("#READ-Name\tCOMPARE");
        for (final File f : this.bamFiles) {
            out.print("\t" + f);
        }
        out.println();
        /* create an array of set<Match> */
        final MatchComparator match_comparator = new MatchComparator();
        final List<Set<Match>> matches = new ArrayList<Set<CompareBamAndBuild.Match>>(2);
        while (matches.size() < 2) {
            matches.add(new TreeSet<CompareBamAndBuild.Match>(match_comparator));
        }
        CloseableIterator<Match> iter = database.iterator();
        String currReadName = null;
        int curr_num_in_pair = -1;
        for (; ; ) {
            final Match nextMatch;
            if (iter.hasNext()) {
                nextMatch = iter.next();
            } else {
                nextMatch = null;
            }
            if (nextMatch == null || (currReadName != null && !currReadName.equals(nextMatch.readName)) || (curr_num_in_pair != -1 && curr_num_in_pair != nextMatch.indexInPair())) {
                if (currReadName != null) {
                    out.print(currReadName);
                    if (curr_num_in_pair > 0) {
                        out.print("/");
                        out.print(curr_num_in_pair);
                    }
                    out.print("\t");
                    if (same(matches.get(0), matches.get(1))) {
                        out.print("EQ");
                    } else {
                        out.print("NE");
                    }
                    for (int x = 0; x < 2; ++x) {
                        out.print("\t");
                        print(out, matches.get(x));
                    }
                    out.println();
                }
                if (nextMatch == null)
                    break;
                for (final Set<Match> set : matches) set.clear();
            }
            currReadName = nextMatch.readName;
            curr_num_in_pair = nextMatch.indexInPair();
            matches.get(nextMatch.firstBamFile ? 0 : 1).add(nextMatch);
        }
        iter.close();
        out.flush();
        out.close();
        database.cleanup();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
        this.liftOver = null;
    }
}
Also used : LiftOver(htsjdk.samtools.liftover.LiftOver) IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) TreeSet(java.util.TreeSet) Set(java.util.Set) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) PrintWriter(java.io.PrintWriter) Interval(htsjdk.samtools.util.Interval)

Example 8 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class CompareBams method doWork.

@Override
public int doWork(final List<String> args) {
    this.IN.addAll(args.stream().map(S -> new File(S)).collect(Collectors.toList()));
    SortingCollection<Match> database = null;
    SamReader samFileReader = null;
    CloseableIterator<Match> iter = null;
    try {
        if (this.IN.size() < 2) {
            LOG.error("Need more bams please");
            return -1;
        }
        database = SortingCollection.newInstance(Match.class, new MatchCodec(), new MatchOrderer(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        this.samSequenceDictAreTheSame = true;
        database.setDestructiveIteration(true);
        for (int currentSamFileIndex = 0; currentSamFileIndex < this.IN.size(); currentSamFileIndex++) {
            final File samFile = this.IN.get(currentSamFileIndex);
            LOG.info("Opening " + samFile);
            samFileReader = super.createSamReaderFactory().open(samFile);
            final SAMSequenceDictionary dict = samFileReader.getFileHeader().getSequenceDictionary();
            if (dict == null || dict.isEmpty()) {
                LOG.error("Empty Dict  in " + samFile);
                return -1;
            }
            if (!this.sequenceDictionaries.isEmpty() && !SequenceUtil.areSequenceDictionariesEqual(this.sequenceDictionaries.get(0), dict)) {
                this.samSequenceDictAreTheSame = false;
                LOG.warn("FOOL !! THE SEQUENCE DICTIONARIES ARE **NOT** THE SAME. I will try to compare anyway but it will be slower.");
            }
            this.sequenceDictionaries.add(dict);
            final Optional<Interval> interval;
            if (REGION != null && !REGION.trim().isEmpty()) {
                final IntervalParser dix = new IntervalParser(dict);
                interval = Optional.ofNullable(dix.parse(REGION));
                if (!interval.isPresent()) {
                    LOG.error("Cannot parse " + REGION + " (bad syntax or not in dictionary)");
                    return -1;
                }
            } else {
                interval = Optional.empty();
            }
            SAMRecordIterator it = null;
            if (!interval.isPresent()) {
                it = samFileReader.iterator();
            } else {
                it = samFileReader.queryOverlapping(interval.get().getContig(), interval.get().getStart(), interval.get().getEnd());
            }
            final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
            while (it.hasNext()) {
                final SAMRecord rec = progress.watch(it.next());
                if (!rec.getReadUnmappedFlag()) {
                    if (rec.getMappingQuality() < this.min_mapq)
                        continue;
                    if (rec.isSecondaryOrSupplementary())
                        continue;
                }
                final Match m = new Match();
                if (rec.getReadPairedFlag()) {
                    m.num_in_pair = (rec.getFirstOfPairFlag() ? 1 : 2);
                } else {
                    m.num_in_pair = 0;
                }
                m.readName = rec.getReadName();
                m.bamIndex = currentSamFileIndex;
                m.flag = rec.getFlags();
                m.cigar = rec.getCigarString();
                if (m.cigar == null)
                    m.cigar = "";
                if (rec.getReadUnmappedFlag()) {
                    m.tid = -1;
                    m.pos = -1;
                } else {
                    m.tid = rec.getReferenceIndex();
                    m.pos = rec.getAlignmentStart();
                }
                database.add(m);
            }
            it.close();
            samFileReader.close();
            samFileReader = null;
            LOG.info("Close " + samFile);
        }
        database.doneAdding();
        LOG.info("Writing results....");
        this.out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        // compute the differences for each read
        this.out.print("#READ-Name\t");
        for (int x = 0; x < this.IN.size(); ++x) {
            for (int y = x + 1; y < this.IN.size(); ++y) {
                if (!(x == 0 && y == 1))
                    this.out.print("|");
                this.out.print(IN.get(x));
                this.out.print(" ");
                this.out.print(IN.get(y));
            }
        }
        for (int x = 0; x < this.IN.size(); ++x) {
            this.out.print("\t" + IN.get(x));
        }
        this.out.println();
        /* create an array of set<Match> */
        final MatchComparator match_comparator = new MatchComparator();
        final List<Set<Match>> matches = new ArrayList<Set<CompareBams.Match>>(this.IN.size());
        while (matches.size() < this.IN.size()) {
            matches.add(new TreeSet<CompareBams.Match>(match_comparator));
        }
        iter = database.iterator();
        String currReadName = null;
        int curr_num_in_pair = -1;
        for (; ; ) {
            Match nextMatch = null;
            if (iter.hasNext()) {
                nextMatch = iter.next();
            }
            if (nextMatch == null || (currReadName != null && !currReadName.equals(nextMatch.readName)) || (curr_num_in_pair != -1 && curr_num_in_pair != nextMatch.num_in_pair)) {
                if (currReadName != null) {
                    this.out.print(currReadName);
                    if (curr_num_in_pair > 0) {
                        this.out.print("/");
                        this.out.print(curr_num_in_pair);
                    }
                    this.out.print("\t");
                    for (int x = 0; x < this.IN.size(); ++x) {
                        final Set<Match> first = matches.get(x);
                        for (int y = x + 1; y < this.IN.size(); ++y) {
                            if (!(x == 0 && y == 1))
                                this.out.print("|");
                            Set<Match> second = matches.get(y);
                            if (same(first, second)) {
                                this.out.print("EQ");
                            } else {
                                this.out.print("NE");
                            }
                        }
                    }
                    for (int x = 0; x < this.IN.size(); ++x) {
                        this.out.print("\t");
                        print(matches.get(x), sequenceDictionaries.get(x));
                    }
                    this.out.println();
                }
                if (nextMatch == null)
                    break;
                for (Set<Match> set : matches) set.clear();
            }
            currReadName = nextMatch.readName;
            curr_num_in_pair = nextMatch.num_in_pair;
            matches.get(nextMatch.bamIndex).add(nextMatch);
            if (this.out.checkError())
                break;
        }
        iter.close();
        this.out.flush();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        if (database != null)
            database.cleanup();
        CloserUtil.close(samFileReader);
        CloserUtil.close(this.out);
        this.out = null;
    }
}
Also used : IntervalParser(com.github.lindenb.jvarkit.util.bio.IntervalParser) TreeSet(java.util.TreeSet) Set(java.util.Set) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) Interval(htsjdk.samtools.util.Interval)

Example 9 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfBiomart method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator iter, final VariantContextWriter out) {
    HttpGet httpGet = null;
    final Pattern tab = Pattern.compile("[\t]");
    try {
        final TransformerFactory factory = TransformerFactory.newInstance();
        final Transformer transformer = factory.newTransformer();
        // transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
        final VCFHeader header = iter.getHeader();
        StringBuilder desc = new StringBuilder("Biomart query. Format: ");
        desc.append(this.attributes.stream().map(S -> this.printLabels ? S + "|" + S : S).collect(Collectors.joining("|")));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        header.addMetaDataLine(new VCFInfoHeaderLine(this.TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, desc.toString()));
        out.writeHeader(header);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        while (iter.hasNext()) {
            final VariantContext ctx = progress.watch(iter.next());
            final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
            vcb.rmAttribute(this.TAG);
            this.filterColumnContig.set(ctx.getContig());
            this.filterColumnStart.set(String.valueOf(ctx.getStart()));
            this.filterColumnEnd.set(String.valueOf(ctx.getEnd()));
            final StringWriter domToStr = new StringWriter();
            transformer.transform(new DOMSource(this.domQuery), new StreamResult(domToStr));
            final URIBuilder builder = new URIBuilder(this.serviceUrl);
            builder.addParameter("query", domToStr.toString());
            // System.err.println("\nwget -O - 'http://grch37.ensembl.org/biomart/martservice?query="+escapedQuery+"'\n");
            // escapedQuery = URLEncoder.encode(escapedQuery,"UTF-8");
            httpGet = new HttpGet(builder.build());
            final CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
            int responseCode = httpResponse.getStatusLine().getStatusCode();
            if (responseCode != 200) {
                throw new RuntimeIOException("Response code was not 200. Detected response was " + responseCode);
            }
            InputStream response = httpResponse.getEntity().getContent();
            if (this.teeResponse) {
                response = new TeeInputStream(response, stderr(), false);
            }
            final BufferedReader br = new BufferedReader(new InputStreamReader(response));
            final Set<String> infoAtts = br.lines().filter(L -> !StringUtil.isBlank(L)).filter(L -> !L.equals("[success]")).map(L -> tab.split(L)).map(T -> {
                final StringBuilder sb = new StringBuilder();
                for (int i = 0; i < this.attributes.size(); i++) {
                    if (i > 0)
                        sb.append("|");
                    if (this.printLabels)
                        sb.append(escapeInfo(this.attributes.get(i))).append("|");
                    sb.append(i < T.length ? escapeInfo(T[i]) : "");
                }
                return sb.toString();
            }).collect(Collectors.toCollection(LinkedHashSet::new));
            CloserUtil.close(br);
            CloserUtil.close(response);
            CloserUtil.close(httpResponse);
            if (!infoAtts.isEmpty()) {
                vcb.attribute(this.TAG, new ArrayList<>(infoAtts));
            }
            out.add(vcb.make());
        }
        progress.finish();
        return 0;
    } catch (final Exception err) {
        LOG.error(err);
        throw new RuntimeIOException(err);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) Transformer(javax.xml.transform.Transformer) DOMSource(javax.xml.transform.dom.DOMSource) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFHeader(htsjdk.variant.vcf.VCFHeader) StreamResult(javax.xml.transform.stream.StreamResult) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Attr(org.w3c.dom.Attr) ArrayList(java.util.ArrayList) StringUtil(htsjdk.samtools.util.StringUtil) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) Document(org.w3c.dom.Document) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) Node(org.w3c.dom.Node) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) LinkedHashSet(java.util.LinkedHashSet) CloserUtil(htsjdk.samtools.util.CloserUtil) TeeInputStream(com.github.lindenb.jvarkit.io.TeeInputStream) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) CloseableHttpClient(org.apache.http.impl.client.CloseableHttpClient) URIBuilder(org.apache.http.client.utils.URIBuilder) StringWriter(java.io.StringWriter) Logger(com.github.lindenb.jvarkit.util.log.Logger) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) Set(java.util.Set) InputStreamReader(java.io.InputStreamReader) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) Element(org.w3c.dom.Element) HttpGet(org.apache.http.client.methods.HttpGet) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) DocumentBuilder(javax.xml.parsers.DocumentBuilder) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) BufferedReader(java.io.BufferedReader) Pattern(java.util.regex.Pattern) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) TransformerFactory(javax.xml.transform.TransformerFactory) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) HttpClients(org.apache.http.impl.client.HttpClients) InputStream(java.io.InputStream) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DOMSource(javax.xml.transform.dom.DOMSource) Transformer(javax.xml.transform.Transformer) HttpGet(org.apache.http.client.methods.HttpGet) VariantContext(htsjdk.variant.variantcontext.VariantContext) StringWriter(java.io.StringWriter) CloseableHttpResponse(org.apache.http.client.methods.CloseableHttpResponse) VCFHeader(htsjdk.variant.vcf.VCFHeader) Pattern(java.util.regex.Pattern) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) TransformerFactory(javax.xml.transform.TransformerFactory) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) StreamResult(javax.xml.transform.stream.StreamResult) InputStreamReader(java.io.InputStreamReader) TeeInputStream(com.github.lindenb.jvarkit.io.TeeInputStream) InputStream(java.io.InputStream) TeeInputStream(com.github.lindenb.jvarkit.io.TeeInputStream) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) URIBuilder(org.apache.http.client.utils.URIBuilder) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader)

Example 10 with SAMSequenceDictionaryProgress

use of com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress in project jvarkit by lindenb.

the class VcfEnsemblVepRest method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator vcfIn, final VariantContextWriter out) {
    try {
        final VCFHeader header = vcfIn.getHeader();
        final List<VariantContext> buffer = new ArrayList<>(this.batchSize + 1);
        final VCFHeader h2 = new VCFHeader(header);
        addMetaData(h2);
        for (final String tag : this.outputTags) {
            h2.addMetaDataLine(new VCFInfoHeaderLine(tag, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "__CUSTOM_DESCRIPTION__" + tag));
        }
        out.writeHeader(h2);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        for (; ; ) {
            VariantContext ctx = null;
            if (vcfIn.hasNext()) {
                buffer.add((ctx = progress.watch(vcfIn.next())));
            }
            if (ctx == null || buffer.size() >= this.batchSize) {
                if (!buffer.isEmpty()) {
                    final Map<String, EnsVepPrediction> input2pred = vep(buffer);
                    for (final VariantContext ctx2 : buffer) {
                        final String inputStr = createInputContext(ctx2);
                        final EnsVepPrediction pred = input2pred.get(inputStr);
                        final VariantContextBuilder vcb = new VariantContextBuilder(ctx2);
                        for (final String tag : this.outputTags) {
                            vcb.rmAttribute(tag);
                        }
                        if (pred == null) {
                            LOG.info("No Annotation found for " + inputStr);
                            out.add(vcb.make());
                            continue;
                        }
                        for (final String tag : this.outputTags) {
                            final Set<String> info = pred.tag2infoLines.get(tag);
                            if (info == null || info.isEmpty())
                                continue;
                            vcb.attribute(tag, new ArrayList<>(info));
                        }
                        out.add(vcb.make());
                    }
                // end of loop over variants
                }
                // end of if buffer is not empty
                if (ctx == null)
                    break;
                buffer.clear();
            }
        }
        progress.finish();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) TransformerException(javax.xml.transform.TransformerException) RuntimeIOException(htsjdk.samtools.util.RuntimeIOException) IOException(java.io.IOException) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Aggregations

SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)146 ArrayList (java.util.ArrayList)64 VariantContext (htsjdk.variant.variantcontext.VariantContext)59 VCFHeader (htsjdk.variant.vcf.VCFHeader)57 SAMRecord (htsjdk.samtools.SAMRecord)54 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)54 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)48 IOException (java.io.IOException)48 File (java.io.File)47 SamReader (htsjdk.samtools.SamReader)40 SAMFileHeader (htsjdk.samtools.SAMFileHeader)38 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)37 HashSet (java.util.HashSet)34 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)32 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)30 List (java.util.List)30 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)29 HashMap (java.util.HashMap)28 Parameter (com.beust.jcommander.Parameter)27 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)27