use of javax.xml.stream.XMLStreamReader in project aries by apache.
the class NamespaceHandlerRegistryImpl method wrapIfNeeded.
/**
* Wrap the handler if needed to fix its behavior.
* When asked for a schema location, some simple handlers always return
* the same url, whatever the asked location is. This can lead to lots
* of problems, so we need to verify and fix those behaviors.
*/
private static NamespaceHandler wrapIfNeeded(final NamespaceHandler handler) {
URL result = null;
try {
result = handler.getSchemaLocation("");
} catch (Throwable t) {
// Ignore
}
if (result != null) {
LOGGER.warn("NamespaceHandler " + handler.getClass().getName() + " is behaving badly and should be fixed");
final URL res = result;
return new NamespaceHandler() {
final ConcurrentMap<String, Boolean> cache = new ConcurrentHashMap<String, Boolean>();
@Override
public URL getSchemaLocation(String s) {
URL url = handler.getSchemaLocation(s);
if (url != null && url.equals(res)) {
Boolean v, newValue;
Boolean valid = ((v = cache.get(s)) == null && (newValue = isValidSchema(s, url)) != null && (v = cache.putIfAbsent(s, newValue)) == null) ? newValue : v;
return valid ? url : null;
}
return url;
}
@Override
public Set<Class> getManagedClasses() {
return handler.getManagedClasses();
}
@Override
public Metadata parse(Element element, ParserContext parserContext) {
return handler.parse(element, parserContext);
}
@Override
public ComponentMetadata decorate(Node node, ComponentMetadata componentMetadata, ParserContext parserContext) {
return handler.decorate(node, componentMetadata, parserContext);
}
private boolean isValidSchema(String ns, URL url) {
try {
InputStream is = url.openStream();
try {
XMLStreamReader reader = XMLInputFactory.newFactory().createXMLStreamReader(is);
try {
reader.nextTag();
String nsuri = reader.getNamespaceURI();
String name = reader.getLocalName();
if ("http://www.w3.org/2001/XMLSchema".equals(nsuri) && "schema".equals(name)) {
String target = reader.getAttributeValue(null, "targetNamespace");
if (ns.equals(target)) {
return true;
}
}
} finally {
reader.close();
}
} finally {
is.close();
}
} catch (Throwable t) {
// Ignore
}
return false;
}
};
} else {
return handler;
}
}
use of javax.xml.stream.XMLStreamReader in project jackrabbit-oak by apache.
the class WikipediaImport method importWikipedia.
public int importWikipedia(Session session) throws Exception {
long start = System.currentTimeMillis();
int count = 0;
int code = 0;
if (doReport) {
System.out.format("Importing %s...%n", dump);
}
String type = "nt:unstructured";
if (session.getWorkspace().getNodeTypeManager().hasNodeType("oak:Unstructured")) {
type = "oak:Unstructured";
}
Node wikipedia = session.getRootNode().addNode("wikipedia", type);
int levels = 0;
if (!flat) {
// estimate that the average XML size of a page is about 1kB
for (long pages = dump.length() / 1024; pages > 256; pages /= 256) {
levels++;
}
}
String title = null;
String text = null;
XMLInputFactory factory = XMLInputFactory.newInstance();
StreamSource source;
if (dump.getName().endsWith(".xml")) {
source = new StreamSource(dump);
} else {
CompressorStreamFactory csf = new CompressorStreamFactory();
source = new StreamSource(csf.createCompressorInputStream(new BufferedInputStream(new FileInputStream(dump))));
}
haltImport = false;
XMLStreamReader reader = factory.createXMLStreamReader(source);
while (reader.hasNext() && !haltImport) {
switch(reader.next()) {
case XMLStreamConstants.START_ELEMENT:
if ("title".equals(reader.getLocalName())) {
title = reader.getElementText();
} else if ("text".equals(reader.getLocalName())) {
text = reader.getElementText();
}
break;
case XMLStreamConstants.END_ELEMENT:
if ("page".equals(reader.getLocalName())) {
String name = Text.escapeIllegalJcrChars(title);
Node parent = wikipedia;
if (levels > 0) {
int n = name.length();
for (int i = 0; i < levels; i++) {
int hash = name.substring(min(i, n)).hashCode();
parent = JcrUtils.getOrAddNode(parent, String.format("%02x", hash & 0xff));
}
}
Node page = parent.addNode(name);
page.setProperty("title", title);
page.setProperty("text", text);
code += title.hashCode();
code += text.hashCode();
count++;
if (count % 1000 == 0) {
batchDone(session, start, count);
}
pageAdded(title, text);
}
break;
}
}
session.save();
if (doReport) {
long millis = System.currentTimeMillis() - start;
System.out.format("Imported %d pages in %d seconds (%.2fms/page)%n", count, millis / 1000, (double) millis / count);
}
return code;
}
use of javax.xml.stream.XMLStreamReader in project karaf by apache.
the class MavenConfigService method getLocalRepositoryFromSettings.
private static String getLocalRepositoryFromSettings(File file) {
XMLStreamReader reader = null;
try (InputStream fin = new FileInputStream(file)) {
reader = XMLInputFactory.newFactory().createXMLStreamReader(fin);
int event;
String elementName = null;
while ((event = reader.next()) != XMLStreamConstants.END_DOCUMENT) {
if (event == XMLStreamConstants.START_ELEMENT) {
elementName = reader.getLocalName();
} else if (event == XMLStreamConstants.END_ELEMENT) {
elementName = null;
} else if (event == XMLStreamConstants.CHARACTERS && "localRepository".equals(elementName)) {
return reader.getText().trim();
}
}
} catch (Exception e) {
logger.error("Error retrieving maven configuration", e);
} finally {
if (reader != null) {
try {
reader.close();
} catch (XMLStreamException e) {
// Ignore
}
}
}
return null;
}
use of javax.xml.stream.XMLStreamReader in project lucene-solr by apache.
the class AddBlockUpdateTest method testXML.
//This is the same as testSolrJXML above but uses the XMLLoader
// to illustrate the structure of the XML documents
@Test
public void testXML() throws IOException, XMLStreamException {
UpdateRequest req = new UpdateRequest();
List<SolrInputDocument> docs = new ArrayList<>();
String xml_doc1 = "<doc >" + " <field name=\"id\">1</field>" + " <field name=\"parent_s\">X</field>" + "<doc> " + " <field name=\"id\" >2</field>" + " <field name=\"child_s\">y</field>" + "</doc>" + "<doc> " + " <field name=\"id\" >3</field>" + " <field name=\"child_s\">z</field>" + "</doc>" + "</doc>";
String xml_doc2 = "<doc >" + " <field name=\"id\">4</field>" + " <field name=\"parent_s\">A</field>" + "<doc> " + " <field name=\"id\" >5</field>" + " <field name=\"child_s\">b</field>" + "</doc>" + "<doc> " + " <field name=\"id\" >6</field>" + " <field name=\"child_s\">c</field>" + "</doc>" + "</doc>";
XMLStreamReader parser = inputFactory.createXMLStreamReader(new StringReader(xml_doc1));
// read the START document...
parser.next();
//null for the processor is all right here
XMLLoader loader = new XMLLoader();
SolrInputDocument document1 = loader.readDoc(parser);
XMLStreamReader parser2 = inputFactory.createXMLStreamReader(new StringReader(xml_doc2));
// read the START document...
parser2.next();
//null for the processor is all right here
//XMLLoader loader = new XMLLoader();
SolrInputDocument document2 = loader.readDoc(parser2);
docs.add(document1);
docs.add(document2);
Collections.shuffle(docs, random());
req.add(docs);
RequestWriter requestWriter = new RequestWriter();
OutputStream os = new ByteArrayOutputStream();
requestWriter.write(req, os);
assertBlockU(os.toString());
assertU(commit());
final SolrIndexSearcher searcher = getSearcher();
assertSingleParentOf(searcher, one("yz"), "X");
assertSingleParentOf(searcher, one("bc"), "A");
}
use of javax.xml.stream.XMLStreamReader in project lucene-solr by apache.
the class DocumentAnalysisRequestHandler method resolveAnalysisRequest.
//================================================ Helper Methods ==================================================
/**
* Resolves the {@link DocumentAnalysisRequest} from the given solr request.
*
* @param req The solr request.
*
* @return The resolved document analysis request.
*
* @throws IOException Thrown when reading/parsing the content stream of the request fails.
* @throws XMLStreamException Thrown when reading/parsing the content stream of the request fails.
*/
DocumentAnalysisRequest resolveAnalysisRequest(SolrQueryRequest req) throws IOException, XMLStreamException {
DocumentAnalysisRequest request = new DocumentAnalysisRequest();
SolrParams params = req.getParams();
String query = params.get(AnalysisParams.QUERY, params.get(CommonParams.Q, null));
request.setQuery(query);
boolean showMatch = params.getBool(AnalysisParams.SHOW_MATCH, false);
request.setShowMatch(showMatch);
ContentStream stream = extractSingleContentStream(req);
InputStream is = null;
XMLStreamReader parser = null;
try {
is = stream.getStream();
final String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
parser = (charset == null) ? inputFactory.createXMLStreamReader(is) : inputFactory.createXMLStreamReader(is, charset);
while (true) {
int event = parser.next();
switch(event) {
case XMLStreamConstants.END_DOCUMENT:
{
parser.close();
return request;
}
case XMLStreamConstants.START_ELEMENT:
{
String currTag = parser.getLocalName();
if ("doc".equals(currTag)) {
log.trace("Reading doc...");
SolrInputDocument document = readDocument(parser, req.getSchema());
request.addDocument(document);
}
break;
}
}
}
} finally {
if (parser != null)
parser.close();
IOUtils.closeQuietly(is);
}
}
Aggregations