Search in sources :

Example 1 with SnpData

use of edu.washington.gs.evs.SnpData in project jvarkit by lindenb.

the class EvsDumpXml method fetchEvsData.

private void fetchEvsData(String chrom, int start, int end) {
    SnpDataBinding dataBinding = new SnpDataBinding();
    double ratio = 100.0 * (this.genome_curr_size + start) / (double) this.genome_total_size;
    LOG.info(chrom + ":" + start + "-" + end + " N=" + count_records + " " + (int) ratio + "%");
    try {
        URL url = new URL("http://gvs-1.gs.washington.edu/wsEVS/EVSDataQueryService");
        // Send data
        URLConnection conn = null;
        for (int n_try = 0; n_try < MAX_TRY; ++n_try) {
            try {
                conn = url.openConnection();
            } catch (java.net.ConnectException err) {
                if (n_try + 1 == MAX_TRY)
                    throw err;
                LOG.warning("Error: trying " + (n_try) + "/" + MAX_TRY + " " + url);
            }
        }
        conn.setDoOutput(true);
        PrintStream wr = new PrintStream(conn.getOutputStream());
        wr.print("<?xml version='1.0' ?>" + "<S:Envelope xmlns:S='http://schemas.xmlsoap.org/soap/envelope/'>" + "<S:Body>" + "<ns2:getEvsData xmlns:ns2='http://webservice.evs.gs.washington.edu/'>" + "<arg0>");
        wr.print(chrom);
        wr.print(":");
        wr.print(String.valueOf(start));
        wr.print("-");
        wr.print(String.valueOf(end));
        wr.print("</arg0>" + "</ns2:getEvsData>" + "</S:Body>" + "</S:Envelope>");
        wr.flush();
        InputStream rd = conn.getInputStream();
        XMLEventReader xmlr = this.xmlInputFactory.createXMLEventReader(rd);
        while (xmlr.hasNext()) {
            XMLEvent evt = xmlr.peek();
            if (!evt.isStartElement() || !evt.asStartElement().getName().getLocalPart().equals("snpList")) {
                xmlr.nextEvent();
                continue;
            }
            SnpData snpData = dataBinding.unmarshaller.unmarshal(xmlr, SnpData.class).getValue();
            StringWriter sw = new StringWriter();
            dataBinding.marshaller.marshal(new JAXBElement<SnpData>(new QName("snpList"), SnpData.class, snpData), sw);
            if (this.sortingCollection != null) {
                this.sortingCollection.add(sw.toString());
            } else {
                this.outputstream.write(sw.toString().getBytes());
                this.outputstream.write('\n');
            }
            ++count_records;
            if (LIMIT > 0 && count_records >= LIMIT)
                break;
        }
        xmlr.close();
        wr.close();
        rd.close();
    } catch (Exception err) {
        err.printStackTrace();
    }
}
Also used : PrintStream(java.io.PrintStream) DataInputStream(java.io.DataInputStream) InputStream(java.io.InputStream) QName(javax.xml.namespace.QName) URL(java.net.URL) URLConnection(java.net.URLConnection) IOException(java.io.IOException) StringWriter(java.io.StringWriter) XMLEvent(javax.xml.stream.events.XMLEvent) XMLEventReader(javax.xml.stream.XMLEventReader) SnpData(edu.washington.gs.evs.SnpData)

Example 2 with SnpData

use of edu.washington.gs.evs.SnpData in project jvarkit by lindenb.

the class EvsDumpXml method doWork.

private int doWork() {
    try {
        this.xmlInputFactory = XMLInputFactory.newFactory();
        TransformerFactory factory = TransformerFactory.newInstance();
        this.transformer = factory.newTransformer();
        this.transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
        if (this.doSort) {
            this.sortingCollection = SortingCollection.newInstance(String.class, new SnpStringCodec(), new SnpDataComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
            this.sortingCollection.setDestructiveIteration(true);
        }
        final List<Fetcher> fetchers = new ArrayList<Fetcher>(24);
        fetchers.add(fetch("1", 249250621));
        fetchers.add(fetch("2", 243199373));
        fetchers.add(fetch("3", 198022430));
        fetchers.add(fetch("4", 191154276));
        fetchers.add(fetch("5", 180915260));
        fetchers.add(fetch("6", 171115067));
        fetchers.add(fetch("7", 159138663));
        fetchers.add(fetch("8", 146364022));
        fetchers.add(fetch("9", 141213431));
        fetchers.add(fetch("10", 135534747));
        fetchers.add(fetch("11", 135006516));
        fetchers.add(fetch("12", 133851895));
        fetchers.add(fetch("13", 115169878));
        fetchers.add(fetch("14", 107349540));
        fetchers.add(fetch("15", 102531392));
        fetchers.add(fetch("16", 90354753));
        fetchers.add(fetch("17", 81195210));
        fetchers.add(fetch("18", 78077248));
        fetchers.add(fetch("19", 59128983));
        fetchers.add(fetch("20", 63025520));
        fetchers.add(fetch("21", 48129895));
        fetchers.add(fetch("22", 51304566));
        fetchers.add(fetch("X", 155270560));
        // fetch("Y",59373566); not in evs
        // fetch("M",16571);
        this.genome_total_size = 0L;
        this.genome_curr_size = 0L;
        for (Fetcher fetcher : fetchers) {
            this.genome_total_size += fetcher.length;
        }
        DynamicIndexCreator indexer = null;
        if (this.outfilename != null) {
            LOG.info("Opening " + this.outfilename);
            this.outputstream = new LocationAwareOutputStream(new FileOutputStream(this.outfilename));
            indexer = new DynamicIndexCreator(this.outfilename, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
        } else {
            this.outputstream = new LocationAwareOutputStream(System.out);
        }
        // print header
        final String xml_header = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<evsData xmlns=\"http://webservice.evs.gs.washington.edu/\">\n";
        this.outputstream.write(xml_header.getBytes());
        for (Fetcher fetcher : fetchers) {
            fetcher.run();
            this.genome_curr_size += fetcher.length;
        }
        if (this.sortingCollection != null) {
            SnpDataBinding snpDataBinding = new SnpDataBinding();
            this.sortingCollection.doneAdding();
            String prev = null;
            CloseableIterator<String> iter = sortingCollection.iterator();
            while (iter.hasNext()) {
                String s = iter.next();
                if (prev != null && prev.equals(s)) {
                    continue;
                }
                long position = outputstream.getPosition();
                outputstream.write(s.getBytes());
                // important SnpDataCodec needs separate lines
                outputstream.write('\n');
                if (indexer != null) {
                    SnpData sd = snpDataBinding.convert(s);
                    indexer.addFeature(new SnpDataFeature(sd), position);
                }
                prev = s;
            }
            iter.close();
        }
        long last_index = this.outputstream.getPosition();
        final String xml_footer = "</evsData>\n";
        this.outputstream.write(xml_footer.getBytes());
        this.outputstream.flush();
        this.outputstream.close();
        if (indexer != null) {
            LOG.info("Writing index");
            final Index index = indexer.finalizeIndex(last_index);
            index.writeBasedOnFeatureFile(this.outfilename);
        }
    } catch (Exception e) {
        e.printStackTrace();
        return -1;
    } finally {
        if (this.sortingCollection != null)
            this.sortingCollection.cleanup();
    }
    return 0;
}
Also used : DynamicIndexCreator(htsjdk.tribble.index.DynamicIndexCreator) LocationAwareOutputStream(com.github.lindenb.jvarkit.io.LocationAwareOutputStream) TransformerFactory(javax.xml.transform.TransformerFactory) ArrayList(java.util.ArrayList) Index(htsjdk.tribble.index.Index) IOException(java.io.IOException) FileOutputStream(java.io.FileOutputStream) SnpData(edu.washington.gs.evs.SnpData)

Example 3 with SnpData

use of edu.washington.gs.evs.SnpData in project jvarkit by lindenb.

the class EvsToVcf method doWork.

@Override
public int doWork(List<String> args) {
    VariantContextWriter out = null;
    try {
        if (!args.isEmpty()) {
            LOG.error("Illegal number of arguments");
            return -1;
        }
        JAXBContext jc = JAXBContext.newInstance(SnpData.class);
        Unmarshaller unmarshaller = jc.createUnmarshaller();
        out = VCFUtils.createVariantContextWriterToStdout();
        SAMSequenceDictionary dict = new SAMSequenceDictionary();
        _fillDict(dict, "1", 249250621);
        _fillDict(dict, "2", 243199373);
        _fillDict(dict, "3", 198022430);
        _fillDict(dict, "4", 191154276);
        _fillDict(dict, "5", 180915260);
        _fillDict(dict, "6", 171115067);
        _fillDict(dict, "7", 159138663);
        _fillDict(dict, "8", 146364022);
        _fillDict(dict, "9", 141213431);
        _fillDict(dict, "10", 135534747);
        _fillDict(dict, "11", 135006516);
        _fillDict(dict, "12", 133851895);
        _fillDict(dict, "13", 115169878);
        _fillDict(dict, "14", 107349540);
        _fillDict(dict, "15", 102531392);
        _fillDict(dict, "16", 90354753);
        _fillDict(dict, "17", 81195210);
        _fillDict(dict, "18", 78077248);
        _fillDict(dict, "19", 59128983);
        _fillDict(dict, "20", 63025520);
        _fillDict(dict, "21", 48129895);
        _fillDict(dict, "22", 51304566);
        _fillDict(dict, "X", 155270560);
        _fillDict(dict, "Y", 59373566);
        _fillDict(dict, "MT", 16569);
        VCFHeader header = new VCFHeader();
        header.setSequenceDictionary(dict);
        header.addMetaDataLine(new VCFInfoHeaderLine("CONS", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScore"));
        header.addMetaDataLine(new VCFInfoHeaderLine("GERP", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
        header.addMetaDataLine(new VCFInfoHeaderLine("uaMAF", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
        header.addMetaDataLine(new VCFInfoHeaderLine("aaMAF", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
        header.addMetaDataLine(new VCFInfoHeaderLine("totalMAF", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
        header.addMetaDataLine(new VCFInfoHeaderLine("DP", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Integer, "conservationScoreGERP"));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
        header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));
        out.writeHeader(header);
        Pattern comma = Pattern.compile("[,]");
        XMLInputFactory xif = XMLInputFactory.newFactory();
        xif.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false);
        XMLEventReader xmlr = xif.createXMLEventReader(System.in);
        while (xmlr.hasNext() && !System.out.checkError()) {
            XMLEvent evt = xmlr.peek();
            if (!evt.isStartElement() || !evt.asStartElement().getName().getLocalPart().equals("snpList")) {
                xmlr.nextEvent();
                continue;
            }
            SnpData snpData = unmarshaller.unmarshal(xmlr, SnpData.class).getValue();
            VariantContextBuilder vcb = new VariantContextBuilder();
            Set<Allele> alleles = new HashSet<Allele>();
            alleles.add(Allele.create(snpData.getRefAllele(), true));
            for (String s : comma.split(snpData.getAltAlleles())) {
                if (isEmpty(s))
                    continue;
                alleles.add(Allele.create(s, false));
            }
            vcb.chr(snpData.getChromosome());
            vcb.start(snpData.getChrPosition());
            vcb.stop(snpData.getChrPosition() + snpData.getRefAllele().length() - 1);
            if (!isEmpty(snpData.getRsIds()) && !snpData.getRsIds().equals("none")) {
                vcb.id(snpData.getRsIds());
            }
            vcb.alleles(alleles);
            Float d = parseDouble(snpData.getConservationScore());
            if (d != null) {
                vcb.attribute("CONS", d);
            }
            d = parseDouble(snpData.getConservationScoreGERP());
            if (d != null) {
                vcb.attribute("GERP", d);
            }
            vcb.attribute("uaMAF", (float) snpData.getUaMAF());
            vcb.attribute("aaMAF", (float) snpData.getAaMAF());
            vcb.attribute("totalMAF", (float) snpData.getTotalMAF());
            vcb.attribute("DP", snpData.getAvgSampleReadDepth());
            out.add(vcb.make());
        }
        xmlr.close();
        out.close();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
    }
}
Also used : Pattern(java.util.regex.Pattern) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) JAXBContext(javax.xml.bind.JAXBContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) XMLEvent(javax.xml.stream.events.XMLEvent) XMLEventReader(javax.xml.stream.XMLEventReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) SnpData(edu.washington.gs.evs.SnpData) Unmarshaller(javax.xml.bind.Unmarshaller) VCFHeader(htsjdk.variant.vcf.VCFHeader) XMLInputFactory(javax.xml.stream.XMLInputFactory) HashSet(java.util.HashSet)

Aggregations

SnpData (edu.washington.gs.evs.SnpData)3 IOException (java.io.IOException)2 XMLEventReader (javax.xml.stream.XMLEventReader)2 XMLEvent (javax.xml.stream.events.XMLEvent)2 LocationAwareOutputStream (com.github.lindenb.jvarkit.io.LocationAwareOutputStream)1 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)1 DynamicIndexCreator (htsjdk.tribble.index.DynamicIndexCreator)1 Index (htsjdk.tribble.index.Index)1 Allele (htsjdk.variant.variantcontext.Allele)1 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)1 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)1 VCFHeader (htsjdk.variant.vcf.VCFHeader)1 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)1 VCFInfoHeaderLine (htsjdk.variant.vcf.VCFInfoHeaderLine)1 DataInputStream (java.io.DataInputStream)1 FileOutputStream (java.io.FileOutputStream)1 InputStream (java.io.InputStream)1 PrintStream (java.io.PrintStream)1 StringWriter (java.io.StringWriter)1 URL (java.net.URL)1