use of javax.xml.stream.events.XMLEvent in project jvarkit by lindenb.
the class EvsDumpXml method fetchEvsData.
private void fetchEvsData(String chrom, int start, int end) {
SnpDataBinding dataBinding = new SnpDataBinding();
double ratio = 100.0 * (this.genome_curr_size + start) / (double) this.genome_total_size;
LOG.info(chrom + ":" + start + "-" + end + " N=" + count_records + " " + (int) ratio + "%");
try {
URL url = new URL("http://gvs-1.gs.washington.edu/wsEVS/EVSDataQueryService");
// Send data
URLConnection conn = null;
for (int n_try = 0; n_try < MAX_TRY; ++n_try) {
try {
conn = url.openConnection();
} catch (java.net.ConnectException err) {
if (n_try + 1 == MAX_TRY)
throw err;
LOG.warning("Error: trying " + (n_try) + "/" + MAX_TRY + " " + url);
}
}
conn.setDoOutput(true);
PrintStream wr = new PrintStream(conn.getOutputStream());
wr.print("<?xml version='1.0' ?>" + "<S:Envelope xmlns:S='http://schemas.xmlsoap.org/soap/envelope/'>" + "<S:Body>" + "<ns2:getEvsData xmlns:ns2='http://webservice.evs.gs.washington.edu/'>" + "<arg0>");
wr.print(chrom);
wr.print(":");
wr.print(String.valueOf(start));
wr.print("-");
wr.print(String.valueOf(end));
wr.print("</arg0>" + "</ns2:getEvsData>" + "</S:Body>" + "</S:Envelope>");
wr.flush();
InputStream rd = conn.getInputStream();
XMLEventReader xmlr = this.xmlInputFactory.createXMLEventReader(rd);
while (xmlr.hasNext()) {
XMLEvent evt = xmlr.peek();
if (!evt.isStartElement() || !evt.asStartElement().getName().getLocalPart().equals("snpList")) {
xmlr.nextEvent();
continue;
}
SnpData snpData = dataBinding.unmarshaller.unmarshal(xmlr, SnpData.class).getValue();
StringWriter sw = new StringWriter();
dataBinding.marshaller.marshal(new JAXBElement<SnpData>(new QName("snpList"), SnpData.class, snpData), sw);
if (this.sortingCollection != null) {
this.sortingCollection.add(sw.toString());
} else {
this.outputstream.write(sw.toString().getBytes());
this.outputstream.write('\n');
}
++count_records;
if (LIMIT > 0 && count_records >= LIMIT)
break;
}
xmlr.close();
wr.close();
rd.close();
} catch (Exception err) {
err.printStackTrace();
}
}
use of javax.xml.stream.events.XMLEvent in project jvarkit by lindenb.
the class MergeBlastXml method doWork.
@Override
public int doWork(List<String> args) {
if (args.isEmpty()) {
LOG.error("input xml missing");
return -1;
}
XMLEventReader rx = null;
XMLEventReader rx2 = null;
XMLEventWriter wx = null;
SortingCollection<Iteration> sortingCollection = null;
try {
JAXBContext jc = JAXBContext.newInstance("gov.nih.nlm.ncbi.blast");
this.unmarshaller = jc.createUnmarshaller();
this.marshaller = jc.createMarshaller();
this.marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
this.marshaller.setProperty(Marshaller.JAXB_FRAGMENT, true);
XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
xmlInputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, Boolean.FALSE);
xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.TRUE);
xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);
xmlInputFactory.setXMLResolver(new XMLResolver() {
@Override
public Object resolveEntity(String arg0, String arg1, String arg2, String arg3) throws XMLStreamException {
LOG.info("resolveEntity:" + arg0 + "/" + arg1 + "/" + arg2);
return null;
}
});
final Comparator<Iteration> hitComparator = (A, B) -> {
return A.getIterationQueryDef().compareTo(B.getIterationQueryDef());
};
sortingCollection = SortingCollection.newInstance(Iteration.class, new BlastIterationCodec(), hitComparator, this.maxRecordsInRam, this.tmpFile.toPath());
rx = xmlInputFactory.createXMLEventReader(new FileReader(args.get(0)));
XMLOutputFactory xof = XMLOutputFactory.newFactory();
if (this.outputFile != null) {
wx = xof.createXMLEventWriter(new StreamResult(this.outputFile));
} else {
wx = xof.createXMLEventWriter(new StreamResult(stdout()));
}
boolean in_iteration = false;
while (rx.hasNext()) {
final XMLEvent evt = rx.peek();
if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("Iteration")) {
final Iteration iteration = this.unmarshaller.unmarshal(rx, Iteration.class).getValue();
sortingCollection.add(iteration);
} else if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("BlastOutput_iterations")) {
wx.add(rx.nextEvent());
in_iteration = true;
} else if (evt.isEndElement() && evt.asEndElement().getName().getLocalPart().equals("BlastOutput_iterations")) {
for (int optind = 1; optind < args.size(); ++optind) {
LOG.info("opening " + args.get(optind));
rx2 = xmlInputFactory.createXMLEventReader(new FileReader(args.get(optind)));
while (rx2.hasNext()) {
XMLEvent evt2 = rx2.peek();
if (evt2.isStartElement() && evt2.asStartElement().getName().getLocalPart().equals("Iteration")) {
final Iteration iteration = this.unmarshaller.unmarshal(rx2, Iteration.class).getValue();
sortingCollection.add(iteration);
} else {
rx2.nextEvent();
}
}
rx2.close();
LOG.info("close");
}
sortingCollection.doneAdding();
sortingCollection.setDestructiveIteration(true);
final CloseableIterator<Iteration> coliter = sortingCollection.iterator();
final EqualRangeIterator<Iteration> eq = new EqualRangeIterator<>(coliter, hitComparator);
while (coliter.hasNext()) {
final List<Iteration> L = eq.next();
for (int i = 1; i < L.size(); ++i) {
L.get(0).getIterationHits().getHit().addAll(L.get(i).getIterationHits().getHit());
}
marshaller.marshal(L.get(0), wx);
}
eq.close();
coliter.close();
sortingCollection.cleanup();
sortingCollection = null;
wx.add(rx.nextEvent());
in_iteration = false;
} else if (in_iteration) {
// consumme text
rx.nextEvent();
} else {
wx.add(rx.nextEvent());
}
}
wx.flush();
wx.close();
return 0;
} catch (Exception e) {
LOG.error(e);
if (sortingCollection != null) {
sortingCollection.cleanup();
}
return -1;
} finally {
}
}
use of javax.xml.stream.events.XMLEvent in project jvarkit by lindenb.
the class Biostar3654 method fetchAnnotations.
/**
* fetches the annotation for a given entry if the name starts with gi|....
*/
private List<INSDFeature> fetchAnnotations(final String database, final String acn, int start, int end) throws Exception {
InputStream in = null;
XMLEventReader r = null;
final List<INSDFeature> L = new ArrayList<INSDFeature>();
if (start > end)
return fetchAnnotations(database, acn, end, start);
try {
if (acn != null && !acn.isEmpty() && !acn.startsWith("Query")) {
String uri = NcbiConstants.efetch() + "?db=" + database + "&id=" + URLEncoder.encode(acn, "UTF-8") + "&rettype=gbc&retmode=xml&seq_start=" + start + "&seq_stop=" + end + this.ncbiApiKey.getAmpParamValue();
LOG.info(uri);
in = new URL(uri).openStream();
r = this.xif.createXMLEventReader(in);
while (r.hasNext()) {
XMLEvent evt = r.peek();
if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("INSDFeature")) {
INSDFeature feature = this.unmarshaller.unmarshal(r, INSDFeature.class).getValue();
INSDFeatureIntervals its = feature.getINSDFeatureIntervals();
if (its == null || its.getINSDInterval().isEmpty())
continue;
for (INSDInterval interval : its.getINSDInterval()) {
// when using seq_start and seq_stop , the NCBI shifts the data...
if (interval.getINSDIntervalFrom() != null && interval.getINSDIntervalTo() != null) {
interval.setINSDIntervalFrom(String.valueOf(Integer.parseInt(interval.getINSDIntervalFrom()) + start - 1));
interval.setINSDIntervalTo(String.valueOf(Integer.parseInt(interval.getINSDIntervalTo()) + start - 1));
} else if (interval.getINSDIntervalPoint() != null) {
interval.setINSDIntervalPoint(String.valueOf(Integer.parseInt(interval.getINSDIntervalPoint()) + start - 1));
}
}
L.add(feature);
} else {
// consumme
r.next();
}
}
}
} catch (Exception err) {
LOG.error(err);
} finally {
CloserUtil.close(r);
CloserUtil.close(in);
}
LOG.info("N(INSDFeature)=" + L.size());
// not found, return empty table
return L;
}
use of javax.xml.stream.events.XMLEvent in project jvarkit by lindenb.
the class Biostar3654 method parseBlast.
/**
* parses BLAST output
*/
private void parseBlast(XMLEventReader r) throws Exception {
String database = "nucleotide";
while (r.hasNext()) {
XMLEvent evt = r.peek();
if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("BlastOutput_program")) {
r.next();
String BlastOutput_program = r.getElementText();
if ("blastn".equals(BlastOutput_program)) {
database = "nucleotide";
} else if ("blastp".equals(BlastOutput_program)) {
database = "protein";
} else {
throw new IOException("only blastn && blastn are supported: " + database);
}
} else if (evt.isStartElement() && evt.asStartElement().getName().getLocalPart().equals("Iteration")) {
Iteration iteration = this.unmarshaller.unmarshal(r, Iteration.class).getValue();
parseIteration(database, iteration);
} else {
// consumme
r.next();
}
}
}
use of javax.xml.stream.events.XMLEvent in project jvarkit by lindenb.
the class GenbankToGff3 method parseGenBank.
private void parseGenBank(final XMLEventReader r) throws JAXBException, XMLStreamException, IOException {
while (r.hasNext()) {
final XMLEvent evt = r.peek();
if (evt.isStartElement()) {
final String name = evt.asStartElement().getName().getLocalPart();
if (name.equals("GBFeature")) {
final GBFeature feature = this.unmarshaller.unmarshal(r, GBFeature.class).getValue();
final MyFeature my = new MyFeature(feature);
if (!my.getIntervals().isEmpty()) {
// we can print things like SNP right now...
if (StringUtil.isBlank(my.findQualifier("locus_tag"))) {
my.print();
} else {
this.buffer.add(my);
}
}
continue;
} else if (name.equals("GBSeq")) {
r.nextEvent();
dump();
continue;
}
}
// consumme event
r.nextEvent();
}
dump();
}
Aggregations