use of htsjdk.tribble.index.DynamicIndexCreator in project jvarkit by lindenb.
the class EvsDumpXml method doWork.
private int doWork() {
try {
this.xmlInputFactory = XMLInputFactory.newFactory();
TransformerFactory factory = TransformerFactory.newInstance();
this.transformer = factory.newTransformer();
this.transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
if (this.doSort) {
this.sortingCollection = SortingCollection.newInstance(String.class, new SnpStringCodec(), new SnpDataComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
this.sortingCollection.setDestructiveIteration(true);
}
final List<Fetcher> fetchers = new ArrayList<Fetcher>(24);
fetchers.add(fetch("1", 249250621));
fetchers.add(fetch("2", 243199373));
fetchers.add(fetch("3", 198022430));
fetchers.add(fetch("4", 191154276));
fetchers.add(fetch("5", 180915260));
fetchers.add(fetch("6", 171115067));
fetchers.add(fetch("7", 159138663));
fetchers.add(fetch("8", 146364022));
fetchers.add(fetch("9", 141213431));
fetchers.add(fetch("10", 135534747));
fetchers.add(fetch("11", 135006516));
fetchers.add(fetch("12", 133851895));
fetchers.add(fetch("13", 115169878));
fetchers.add(fetch("14", 107349540));
fetchers.add(fetch("15", 102531392));
fetchers.add(fetch("16", 90354753));
fetchers.add(fetch("17", 81195210));
fetchers.add(fetch("18", 78077248));
fetchers.add(fetch("19", 59128983));
fetchers.add(fetch("20", 63025520));
fetchers.add(fetch("21", 48129895));
fetchers.add(fetch("22", 51304566));
fetchers.add(fetch("X", 155270560));
// fetch("Y",59373566); not in evs
// fetch("M",16571);
this.genome_total_size = 0L;
this.genome_curr_size = 0L;
for (Fetcher fetcher : fetchers) {
this.genome_total_size += fetcher.length;
}
DynamicIndexCreator indexer = null;
if (this.outfilename != null) {
LOG.info("Opening " + this.outfilename);
this.outputstream = new LocationAwareOutputStream(new FileOutputStream(this.outfilename));
indexer = new DynamicIndexCreator(this.outfilename, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
} else {
this.outputstream = new LocationAwareOutputStream(System.out);
}
// print header
final String xml_header = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<evsData xmlns=\"http://webservice.evs.gs.washington.edu/\">\n";
this.outputstream.write(xml_header.getBytes());
for (Fetcher fetcher : fetchers) {
fetcher.run();
this.genome_curr_size += fetcher.length;
}
if (this.sortingCollection != null) {
SnpDataBinding snpDataBinding = new SnpDataBinding();
this.sortingCollection.doneAdding();
String prev = null;
CloseableIterator<String> iter = sortingCollection.iterator();
while (iter.hasNext()) {
String s = iter.next();
if (prev != null && prev.equals(s)) {
continue;
}
long position = outputstream.getPosition();
outputstream.write(s.getBytes());
// important SnpDataCodec needs separate lines
outputstream.write('\n');
if (indexer != null) {
SnpData sd = snpDataBinding.convert(s);
indexer.addFeature(new SnpDataFeature(sd), position);
}
prev = s;
}
iter.close();
}
long last_index = this.outputstream.getPosition();
final String xml_footer = "</evsData>\n";
this.outputstream.write(xml_footer.getBytes());
this.outputstream.flush();
this.outputstream.close();
if (indexer != null) {
LOG.info("Writing index");
final Index index = indexer.finalizeIndex(last_index);
index.writeBasedOnFeatureFile(this.outfilename);
}
} catch (Exception e) {
e.printStackTrace();
return -1;
} finally {
if (this.sortingCollection != null)
this.sortingCollection.cleanup();
}
return 0;
}
Aggregations