Search in sources :

Example 1 with LocationAwareOutputStream

use of com.github.lindenb.jvarkit.io.LocationAwareOutputStream in project jvarkit by lindenb.

the class EvsDumpXml method doWork.

private int doWork() {
    try {
        this.xmlInputFactory = XMLInputFactory.newFactory();
        TransformerFactory factory = TransformerFactory.newInstance();
        this.transformer = factory.newTransformer();
        this.transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
        if (this.doSort) {
            this.sortingCollection = SortingCollection.newInstance(String.class, new SnpStringCodec(), new SnpDataComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
            this.sortingCollection.setDestructiveIteration(true);
        }
        final List<Fetcher> fetchers = new ArrayList<Fetcher>(24);
        fetchers.add(fetch("1", 249250621));
        fetchers.add(fetch("2", 243199373));
        fetchers.add(fetch("3", 198022430));
        fetchers.add(fetch("4", 191154276));
        fetchers.add(fetch("5", 180915260));
        fetchers.add(fetch("6", 171115067));
        fetchers.add(fetch("7", 159138663));
        fetchers.add(fetch("8", 146364022));
        fetchers.add(fetch("9", 141213431));
        fetchers.add(fetch("10", 135534747));
        fetchers.add(fetch("11", 135006516));
        fetchers.add(fetch("12", 133851895));
        fetchers.add(fetch("13", 115169878));
        fetchers.add(fetch("14", 107349540));
        fetchers.add(fetch("15", 102531392));
        fetchers.add(fetch("16", 90354753));
        fetchers.add(fetch("17", 81195210));
        fetchers.add(fetch("18", 78077248));
        fetchers.add(fetch("19", 59128983));
        fetchers.add(fetch("20", 63025520));
        fetchers.add(fetch("21", 48129895));
        fetchers.add(fetch("22", 51304566));
        fetchers.add(fetch("X", 155270560));
        // fetch("Y",59373566); not in evs
        // fetch("M",16571);
        this.genome_total_size = 0L;
        this.genome_curr_size = 0L;
        for (Fetcher fetcher : fetchers) {
            this.genome_total_size += fetcher.length;
        }
        DynamicIndexCreator indexer = null;
        if (this.outfilename != null) {
            LOG.info("Opening " + this.outfilename);
            this.outputstream = new LocationAwareOutputStream(new FileOutputStream(this.outfilename));
            indexer = new DynamicIndexCreator(this.outfilename, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
        } else {
            this.outputstream = new LocationAwareOutputStream(System.out);
        }
        // print header
        final String xml_header = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<evsData xmlns=\"http://webservice.evs.gs.washington.edu/\">\n";
        this.outputstream.write(xml_header.getBytes());
        for (Fetcher fetcher : fetchers) {
            fetcher.run();
            this.genome_curr_size += fetcher.length;
        }
        if (this.sortingCollection != null) {
            SnpDataBinding snpDataBinding = new SnpDataBinding();
            this.sortingCollection.doneAdding();
            String prev = null;
            CloseableIterator<String> iter = sortingCollection.iterator();
            while (iter.hasNext()) {
                String s = iter.next();
                if (prev != null && prev.equals(s)) {
                    continue;
                }
                long position = outputstream.getPosition();
                outputstream.write(s.getBytes());
                // important SnpDataCodec needs separate lines
                outputstream.write('\n');
                if (indexer != null) {
                    SnpData sd = snpDataBinding.convert(s);
                    indexer.addFeature(new SnpDataFeature(sd), position);
                }
                prev = s;
            }
            iter.close();
        }
        long last_index = this.outputstream.getPosition();
        final String xml_footer = "</evsData>\n";
        this.outputstream.write(xml_footer.getBytes());
        this.outputstream.flush();
        this.outputstream.close();
        if (indexer != null) {
            LOG.info("Writing index");
            final Index index = indexer.finalizeIndex(last_index);
            index.writeBasedOnFeatureFile(this.outfilename);
        }
    } catch (Exception e) {
        e.printStackTrace();
        return -1;
    } finally {
        if (this.sortingCollection != null)
            this.sortingCollection.cleanup();
    }
    return 0;
}
Also used : DynamicIndexCreator(htsjdk.tribble.index.DynamicIndexCreator) LocationAwareOutputStream(com.github.lindenb.jvarkit.io.LocationAwareOutputStream) TransformerFactory(javax.xml.transform.TransformerFactory) ArrayList(java.util.ArrayList) Index(htsjdk.tribble.index.Index) IOException(java.io.IOException) FileOutputStream(java.io.FileOutputStream) SnpData(edu.washington.gs.evs.SnpData)

Aggregations

LocationAwareOutputStream (com.github.lindenb.jvarkit.io.LocationAwareOutputStream)1 SnpData (edu.washington.gs.evs.SnpData)1 DynamicIndexCreator (htsjdk.tribble.index.DynamicIndexCreator)1 Index (htsjdk.tribble.index.Index)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 TransformerFactory (javax.xml.transform.TransformerFactory)1