use of edu.washington.gs.evs.SnpData in project jvarkit by lindenb.
the class EvsDumpXml method fetchEvsData.
private void fetchEvsData(String chrom, int start, int end) {
SnpDataBinding dataBinding = new SnpDataBinding();
double ratio = 100.0 * (this.genome_curr_size + start) / (double) this.genome_total_size;
LOG.info(chrom + ":" + start + "-" + end + " N=" + count_records + " " + (int) ratio + "%");
try {
URL url = new URL("http://gvs-1.gs.washington.edu/wsEVS/EVSDataQueryService");
// Send data
URLConnection conn = null;
for (int n_try = 0; n_try < MAX_TRY; ++n_try) {
try {
conn = url.openConnection();
} catch (java.net.ConnectException err) {
if (n_try + 1 == MAX_TRY)
throw err;
LOG.warning("Error: trying " + (n_try) + "/" + MAX_TRY + " " + url);
}
}
conn.setDoOutput(true);
PrintStream wr = new PrintStream(conn.getOutputStream());
wr.print("<?xml version='1.0' ?>" + "<S:Envelope xmlns:S='http://schemas.xmlsoap.org/soap/envelope/'>" + "<S:Body>" + "<ns2:getEvsData xmlns:ns2='http://webservice.evs.gs.washington.edu/'>" + "<arg0>");
wr.print(chrom);
wr.print(":");
wr.print(String.valueOf(start));
wr.print("-");
wr.print(String.valueOf(end));
wr.print("</arg0>" + "</ns2:getEvsData>" + "</S:Body>" + "</S:Envelope>");
wr.flush();
InputStream rd = conn.getInputStream();
XMLEventReader xmlr = this.xmlInputFactory.createXMLEventReader(rd);
while (xmlr.hasNext()) {
XMLEvent evt = xmlr.peek();
if (!evt.isStartElement() || !evt.asStartElement().getName().getLocalPart().equals("snpList")) {
xmlr.nextEvent();
continue;
}
SnpData snpData = dataBinding.unmarshaller.unmarshal(xmlr, SnpData.class).getValue();
StringWriter sw = new StringWriter();
dataBinding.marshaller.marshal(new JAXBElement<SnpData>(new QName("snpList"), SnpData.class, snpData), sw);
if (this.sortingCollection != null) {
this.sortingCollection.add(sw.toString());
} else {
this.outputstream.write(sw.toString().getBytes());
this.outputstream.write('\n');
}
++count_records;
if (LIMIT > 0 && count_records >= LIMIT)
break;
}
xmlr.close();
wr.close();
rd.close();
} catch (Exception err) {
err.printStackTrace();
}
}
use of edu.washington.gs.evs.SnpData in project jvarkit by lindenb.
the class EvsDumpXml method doWork.
private int doWork() {
try {
this.xmlInputFactory = XMLInputFactory.newFactory();
TransformerFactory factory = TransformerFactory.newInstance();
this.transformer = factory.newTransformer();
this.transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
if (this.doSort) {
this.sortingCollection = SortingCollection.newInstance(String.class, new SnpStringCodec(), new SnpDataComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
this.sortingCollection.setDestructiveIteration(true);
}
final List<Fetcher> fetchers = new ArrayList<Fetcher>(24);
fetchers.add(fetch("1", 249250621));
fetchers.add(fetch("2", 243199373));
fetchers.add(fetch("3", 198022430));
fetchers.add(fetch("4", 191154276));
fetchers.add(fetch("5", 180915260));
fetchers.add(fetch("6", 171115067));
fetchers.add(fetch("7", 159138663));
fetchers.add(fetch("8", 146364022));
fetchers.add(fetch("9", 141213431));
fetchers.add(fetch("10", 135534747));
fetchers.add(fetch("11", 135006516));
fetchers.add(fetch("12", 133851895));
fetchers.add(fetch("13", 115169878));
fetchers.add(fetch("14", 107349540));
fetchers.add(fetch("15", 102531392));
fetchers.add(fetch("16", 90354753));
fetchers.add(fetch("17", 81195210));
fetchers.add(fetch("18", 78077248));
fetchers.add(fetch("19", 59128983));
fetchers.add(fetch("20", 63025520));
fetchers.add(fetch("21", 48129895));
fetchers.add(fetch("22", 51304566));
fetchers.add(fetch("X", 155270560));
// fetch("Y",59373566); not in evs
// fetch("M",16571);
this.genome_total_size = 0L;
this.genome_curr_size = 0L;
for (Fetcher fetcher : fetchers) {
this.genome_total_size += fetcher.length;
}
DynamicIndexCreator indexer = null;
if (this.outfilename != null) {
LOG.info("Opening " + this.outfilename);
this.outputstream = new LocationAwareOutputStream(new FileOutputStream(this.outfilename));
indexer = new DynamicIndexCreator(this.outfilename, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
} else {
this.outputstream = new LocationAwareOutputStream(System.out);
}
// print header
final String xml_header = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<evsData xmlns=\"http://webservice.evs.gs.washington.edu/\">\n";
this.outputstream.write(xml_header.getBytes());
for (Fetcher fetcher : fetchers) {
fetcher.run();
this.genome_curr_size += fetcher.length;
}
if (this.sortingCollection != null) {
SnpDataBinding snpDataBinding = new SnpDataBinding();
this.sortingCollection.doneAdding();
String prev = null;
CloseableIterator<String> iter = sortingCollection.iterator();
while (iter.hasNext()) {
String s = iter.next();
if (prev != null && prev.equals(s)) {
continue;
}
long position = outputstream.getPosition();
outputstream.write(s.getBytes());
// important SnpDataCodec needs separate lines
outputstream.write('\n');
if (indexer != null) {
SnpData sd = snpDataBinding.convert(s);
indexer.addFeature(new SnpDataFeature(sd), position);
}
prev = s;
}
iter.close();
}
long last_index = this.outputstream.getPosition();
final String xml_footer = "</evsData>\n";
this.outputstream.write(xml_footer.getBytes());
this.outputstream.flush();
this.outputstream.close();
if (indexer != null) {
LOG.info("Writing index");
final Index index = indexer.finalizeIndex(last_index);
index.writeBasedOnFeatureFile(this.outfilename);
}
} catch (Exception e) {
e.printStackTrace();
return -1;
} finally {
if (this.sortingCollection != null)
this.sortingCollection.cleanup();
}
return 0;
}
use of edu.washington.gs.evs.SnpData in project jvarkit by lindenb.
the class EvsToVcf method doWork.
@Override
public int doWork(List<String> args) {
VariantContextWriter out = null;
try {
if (!args.isEmpty()) {
LOG.error("Illegal number of arguments");
return -1;
}
JAXBContext jc = JAXBContext.newInstance(SnpData.class);
Unmarshaller unmarshaller = jc.createUnmarshaller();
out = VCFUtils.createVariantContextWriterToStdout();
SAMSequenceDictionary dict = new SAMSequenceDictionary();
_fillDict(dict, "1", 249250621);
_fillDict(dict, "2", 243199373);
_fillDict(dict, "3", 198022430);
_fillDict(dict, "4", 191154276);
_fillDict(dict, "5", 180915260);
_fillDict(dict, "6", 171115067);
_fillDict(dict, "7", 159138663);
_fillDict(dict, "8", 146364022);
_fillDict(dict, "9", 141213431);
_fillDict(dict, "10", 135534747);
_fillDict(dict, "11", 135006516);
_fillDict(dict, "12", 133851895);
_fillDict(dict, "13", 115169878);
_fillDict(dict, "14", 107349540);
_fillDict(dict, "15", 102531392);
_fillDict(dict, "16", 90354753);
_fillDict(dict, "17", 81195210);
_fillDict(dict, "18", 78077248);
_fillDict(dict, "19", 59128983);
_fillDict(dict, "20", 63025520);
_fillDict(dict, "21", 48129895);
_fillDict(dict, "22", 51304566);
_fillDict(dict, "X", 155270560);
_fillDict(dict, "Y", 59373566);
_fillDict(dict, "MT", 16569);
VCFHeader header = new VCFHeader();
header.setSequenceDictionary(dict);
header.addMetaDataLine(new VCFInfoHeaderLine("CONS", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScore"));
header.addMetaDataLine(new VCFInfoHeaderLine("GERP", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
header.addMetaDataLine(new VCFInfoHeaderLine("uaMAF", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
header.addMetaDataLine(new VCFInfoHeaderLine("aaMAF", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
header.addMetaDataLine(new VCFInfoHeaderLine("totalMAF", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Float, "conservationScoreGERP"));
header.addMetaDataLine(new VCFInfoHeaderLine("DP", VCFHeaderLineCount.INTEGER, VCFHeaderLineType.Integer, "conservationScoreGERP"));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkVersion", HtsjdkVersion.getVersion()));
header.addMetaDataLine(new VCFHeaderLine(getClass().getSimpleName() + "HtsJdkHome", HtsjdkVersion.getHome()));
out.writeHeader(header);
Pattern comma = Pattern.compile("[,]");
XMLInputFactory xif = XMLInputFactory.newFactory();
xif.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false);
XMLEventReader xmlr = xif.createXMLEventReader(System.in);
while (xmlr.hasNext() && !System.out.checkError()) {
XMLEvent evt = xmlr.peek();
if (!evt.isStartElement() || !evt.asStartElement().getName().getLocalPart().equals("snpList")) {
xmlr.nextEvent();
continue;
}
SnpData snpData = unmarshaller.unmarshal(xmlr, SnpData.class).getValue();
VariantContextBuilder vcb = new VariantContextBuilder();
Set<Allele> alleles = new HashSet<Allele>();
alleles.add(Allele.create(snpData.getRefAllele(), true));
for (String s : comma.split(snpData.getAltAlleles())) {
if (isEmpty(s))
continue;
alleles.add(Allele.create(s, false));
}
vcb.chr(snpData.getChromosome());
vcb.start(snpData.getChrPosition());
vcb.stop(snpData.getChrPosition() + snpData.getRefAllele().length() - 1);
if (!isEmpty(snpData.getRsIds()) && !snpData.getRsIds().equals("none")) {
vcb.id(snpData.getRsIds());
}
vcb.alleles(alleles);
Float d = parseDouble(snpData.getConservationScore());
if (d != null) {
vcb.attribute("CONS", d);
}
d = parseDouble(snpData.getConservationScoreGERP());
if (d != null) {
vcb.attribute("GERP", d);
}
vcb.attribute("uaMAF", (float) snpData.getUaMAF());
vcb.attribute("aaMAF", (float) snpData.getAaMAF());
vcb.attribute("totalMAF", (float) snpData.getTotalMAF());
vcb.attribute("DP", snpData.getAvgSampleReadDepth());
out.add(vcb.make());
}
xmlr.close();
out.close();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(out);
}
}
Aggregations