Search in sources :

Example 46 with XMLStreamReader

use of javax.xml.stream.XMLStreamReader in project aries by apache.

the class NamespaceHandlerRegistryImpl method wrapIfNeeded.

/**
     * Wrap the handler if needed to fix its behavior.
     * When asked for a schema location, some simple handlers always return
     * the same url, whatever the asked location is.  This can lead to lots
     * of problems, so we need to verify and fix those behaviors.
     */
private static NamespaceHandler wrapIfNeeded(final NamespaceHandler handler) {
    URL result = null;
    try {
        result = handler.getSchemaLocation("");
    } catch (Throwable t) {
    // Ignore
    }
    if (result != null) {
        LOGGER.warn("NamespaceHandler " + handler.getClass().getName() + " is behaving badly and should be fixed");
        final URL res = result;
        return new NamespaceHandler() {

            final ConcurrentMap<String, Boolean> cache = new ConcurrentHashMap<String, Boolean>();

            @Override
            public URL getSchemaLocation(String s) {
                URL url = handler.getSchemaLocation(s);
                if (url != null && url.equals(res)) {
                    Boolean v, newValue;
                    Boolean valid = ((v = cache.get(s)) == null && (newValue = isValidSchema(s, url)) != null && (v = cache.putIfAbsent(s, newValue)) == null) ? newValue : v;
                    return valid ? url : null;
                }
                return url;
            }

            @Override
            public Set<Class> getManagedClasses() {
                return handler.getManagedClasses();
            }

            @Override
            public Metadata parse(Element element, ParserContext parserContext) {
                return handler.parse(element, parserContext);
            }

            @Override
            public ComponentMetadata decorate(Node node, ComponentMetadata componentMetadata, ParserContext parserContext) {
                return handler.decorate(node, componentMetadata, parserContext);
            }

            private boolean isValidSchema(String ns, URL url) {
                try {
                    InputStream is = url.openStream();
                    try {
                        XMLStreamReader reader = XMLInputFactory.newFactory().createXMLStreamReader(is);
                        try {
                            reader.nextTag();
                            String nsuri = reader.getNamespaceURI();
                            String name = reader.getLocalName();
                            if ("http://www.w3.org/2001/XMLSchema".equals(nsuri) && "schema".equals(name)) {
                                String target = reader.getAttributeValue(null, "targetNamespace");
                                if (ns.equals(target)) {
                                    return true;
                                }
                            }
                        } finally {
                            reader.close();
                        }
                    } finally {
                        is.close();
                    }
                } catch (Throwable t) {
                // Ignore
                }
                return false;
            }
        };
    } else {
        return handler;
    }
}
Also used : XMLStreamReader(javax.xml.stream.XMLStreamReader) InputStream(java.io.InputStream) Element(org.w3c.dom.Element) Node(org.w3c.dom.Node) ConcurrentMap(java.util.concurrent.ConcurrentMap) ComponentMetadata(org.osgi.service.blueprint.reflect.ComponentMetadata) URL(java.net.URL) NamespaceHandler(org.apache.aries.blueprint.NamespaceHandler) ParserContext(org.apache.aries.blueprint.ParserContext)

Example 47 with XMLStreamReader

use of javax.xml.stream.XMLStreamReader in project jackrabbit-oak by apache.

the class WikipediaImport method importWikipedia.

public int importWikipedia(Session session) throws Exception {
    long start = System.currentTimeMillis();
    int count = 0;
    int code = 0;
    if (doReport) {
        System.out.format("Importing %s...%n", dump);
    }
    String type = "nt:unstructured";
    if (session.getWorkspace().getNodeTypeManager().hasNodeType("oak:Unstructured")) {
        type = "oak:Unstructured";
    }
    Node wikipedia = session.getRootNode().addNode("wikipedia", type);
    int levels = 0;
    if (!flat) {
        // estimate that the average XML size of a page is about 1kB
        for (long pages = dump.length() / 1024; pages > 256; pages /= 256) {
            levels++;
        }
    }
    String title = null;
    String text = null;
    XMLInputFactory factory = XMLInputFactory.newInstance();
    StreamSource source;
    if (dump.getName().endsWith(".xml")) {
        source = new StreamSource(dump);
    } else {
        CompressorStreamFactory csf = new CompressorStreamFactory();
        source = new StreamSource(csf.createCompressorInputStream(new BufferedInputStream(new FileInputStream(dump))));
    }
    haltImport = false;
    XMLStreamReader reader = factory.createXMLStreamReader(source);
    while (reader.hasNext() && !haltImport) {
        switch(reader.next()) {
            case XMLStreamConstants.START_ELEMENT:
                if ("title".equals(reader.getLocalName())) {
                    title = reader.getElementText();
                } else if ("text".equals(reader.getLocalName())) {
                    text = reader.getElementText();
                }
                break;
            case XMLStreamConstants.END_ELEMENT:
                if ("page".equals(reader.getLocalName())) {
                    String name = Text.escapeIllegalJcrChars(title);
                    Node parent = wikipedia;
                    if (levels > 0) {
                        int n = name.length();
                        for (int i = 0; i < levels; i++) {
                            int hash = name.substring(min(i, n)).hashCode();
                            parent = JcrUtils.getOrAddNode(parent, String.format("%02x", hash & 0xff));
                        }
                    }
                    Node page = parent.addNode(name);
                    page.setProperty("title", title);
                    page.setProperty("text", text);
                    code += title.hashCode();
                    code += text.hashCode();
                    count++;
                    if (count % 1000 == 0) {
                        batchDone(session, start, count);
                    }
                    pageAdded(title, text);
                }
                break;
        }
    }
    session.save();
    if (doReport) {
        long millis = System.currentTimeMillis() - start;
        System.out.format("Imported %d pages in %d seconds (%.2fms/page)%n", count, millis / 1000, (double) millis / count);
    }
    return code;
}
Also used : XMLStreamReader(javax.xml.stream.XMLStreamReader) BufferedInputStream(java.io.BufferedInputStream) Node(javax.jcr.Node) StreamSource(javax.xml.transform.stream.StreamSource) CompressorStreamFactory(org.apache.commons.compress.compressors.CompressorStreamFactory) XMLInputFactory(javax.xml.stream.XMLInputFactory) FileInputStream(java.io.FileInputStream)

Example 48 with XMLStreamReader

use of javax.xml.stream.XMLStreamReader in project karaf by apache.

the class MavenConfigService method getLocalRepositoryFromSettings.

private static String getLocalRepositoryFromSettings(File file) {
    XMLStreamReader reader = null;
    try (InputStream fin = new FileInputStream(file)) {
        reader = XMLInputFactory.newFactory().createXMLStreamReader(fin);
        int event;
        String elementName = null;
        while ((event = reader.next()) != XMLStreamConstants.END_DOCUMENT) {
            if (event == XMLStreamConstants.START_ELEMENT) {
                elementName = reader.getLocalName();
            } else if (event == XMLStreamConstants.END_ELEMENT) {
                elementName = null;
            } else if (event == XMLStreamConstants.CHARACTERS && "localRepository".equals(elementName)) {
                return reader.getText().trim();
            }
        }
    } catch (Exception e) {
        logger.error("Error retrieving maven configuration", e);
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (XMLStreamException e) {
            // Ignore
            }
        }
    }
    return null;
}
Also used : XMLStreamReader(javax.xml.stream.XMLStreamReader) XMLStreamException(javax.xml.stream.XMLStreamException) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileInputStream(java.io.FileInputStream) XMLStreamException(javax.xml.stream.XMLStreamException)

Example 49 with XMLStreamReader

use of javax.xml.stream.XMLStreamReader in project lucene-solr by apache.

the class AddBlockUpdateTest method testXML.

//This is the same as testSolrJXML above but uses the XMLLoader
// to illustrate the structure of the XML documents
@Test
public void testXML() throws IOException, XMLStreamException {
    UpdateRequest req = new UpdateRequest();
    List<SolrInputDocument> docs = new ArrayList<>();
    String xml_doc1 = "<doc >" + "  <field name=\"id\">1</field>" + "  <field name=\"parent_s\">X</field>" + "<doc>  " + "  <field name=\"id\" >2</field>" + "  <field name=\"child_s\">y</field>" + "</doc>" + "<doc>  " + "  <field name=\"id\" >3</field>" + "  <field name=\"child_s\">z</field>" + "</doc>" + "</doc>";
    String xml_doc2 = "<doc >" + "  <field name=\"id\">4</field>" + "  <field name=\"parent_s\">A</field>" + "<doc>  " + "  <field name=\"id\" >5</field>" + "  <field name=\"child_s\">b</field>" + "</doc>" + "<doc>  " + "  <field name=\"id\" >6</field>" + "  <field name=\"child_s\">c</field>" + "</doc>" + "</doc>";
    XMLStreamReader parser = inputFactory.createXMLStreamReader(new StringReader(xml_doc1));
    // read the START document...
    parser.next();
    //null for the processor is all right here
    XMLLoader loader = new XMLLoader();
    SolrInputDocument document1 = loader.readDoc(parser);
    XMLStreamReader parser2 = inputFactory.createXMLStreamReader(new StringReader(xml_doc2));
    // read the START document...
    parser2.next();
    //null for the processor is all right here
    //XMLLoader loader = new XMLLoader();
    SolrInputDocument document2 = loader.readDoc(parser2);
    docs.add(document1);
    docs.add(document2);
    Collections.shuffle(docs, random());
    req.add(docs);
    RequestWriter requestWriter = new RequestWriter();
    OutputStream os = new ByteArrayOutputStream();
    requestWriter.write(req, os);
    assertBlockU(os.toString());
    assertU(commit());
    final SolrIndexSearcher searcher = getSearcher();
    assertSingleParentOf(searcher, one("yz"), "X");
    assertSingleParentOf(searcher, one("bc"), "A");
}
Also used : SolrInputDocument(org.apache.solr.common.SolrInputDocument) XMLStreamReader(javax.xml.stream.XMLStreamReader) UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) OutputStream(java.io.OutputStream) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) ArrayList(java.util.ArrayList) StringReader(java.io.StringReader) RequestWriter(org.apache.solr.client.solrj.request.RequestWriter) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) XMLLoader(org.apache.solr.handler.loader.XMLLoader) Test(org.junit.Test)

Example 50 with XMLStreamReader

use of javax.xml.stream.XMLStreamReader in project lucene-solr by apache.

the class DocumentAnalysisRequestHandler method resolveAnalysisRequest.

//================================================ Helper Methods ==================================================
/**
   * Resolves the {@link DocumentAnalysisRequest} from the given solr request.
   *
   * @param req The solr request.
   *
   * @return The resolved document analysis request.
   *
   * @throws IOException        Thrown when reading/parsing the content stream of the request fails.
   * @throws XMLStreamException Thrown when reading/parsing the content stream of the request fails.
   */
DocumentAnalysisRequest resolveAnalysisRequest(SolrQueryRequest req) throws IOException, XMLStreamException {
    DocumentAnalysisRequest request = new DocumentAnalysisRequest();
    SolrParams params = req.getParams();
    String query = params.get(AnalysisParams.QUERY, params.get(CommonParams.Q, null));
    request.setQuery(query);
    boolean showMatch = params.getBool(AnalysisParams.SHOW_MATCH, false);
    request.setShowMatch(showMatch);
    ContentStream stream = extractSingleContentStream(req);
    InputStream is = null;
    XMLStreamReader parser = null;
    try {
        is = stream.getStream();
        final String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
        parser = (charset == null) ? inputFactory.createXMLStreamReader(is) : inputFactory.createXMLStreamReader(is, charset);
        while (true) {
            int event = parser.next();
            switch(event) {
                case XMLStreamConstants.END_DOCUMENT:
                    {
                        parser.close();
                        return request;
                    }
                case XMLStreamConstants.START_ELEMENT:
                    {
                        String currTag = parser.getLocalName();
                        if ("doc".equals(currTag)) {
                            log.trace("Reading doc...");
                            SolrInputDocument document = readDocument(parser, req.getSchema());
                            request.addDocument(document);
                        }
                        break;
                    }
            }
        }
    } finally {
        if (parser != null)
            parser.close();
        IOUtils.closeQuietly(is);
    }
}
Also used : ContentStream(org.apache.solr.common.util.ContentStream) SolrInputDocument(org.apache.solr.common.SolrInputDocument) XMLStreamReader(javax.xml.stream.XMLStreamReader) InputStream(java.io.InputStream) SolrParams(org.apache.solr.common.params.SolrParams) DocumentAnalysisRequest(org.apache.solr.client.solrj.request.DocumentAnalysisRequest)

Aggregations

XMLStreamReader (javax.xml.stream.XMLStreamReader)243 XMLInputFactory (javax.xml.stream.XMLInputFactory)98 StringReader (java.io.StringReader)85 XMLStreamException (javax.xml.stream.XMLStreamException)78 InputStream (java.io.InputStream)61 IOException (java.io.IOException)43 OMElement (org.apache.axiom.om.OMElement)37 ByteArrayInputStream (java.io.ByteArrayInputStream)27 Test (org.junit.Test)25 JAXBException (javax.xml.bind.JAXBException)16 QName (javax.xml.namespace.QName)16 StAXSource (javax.xml.transform.stax.StAXSource)16 StreamSource (javax.xml.transform.stream.StreamSource)16 FileInputStream (java.io.FileInputStream)14 OMFactory (org.apache.axiom.om.OMFactory)14 Unmarshaller (javax.xml.bind.Unmarshaller)13 InputStreamReader (java.io.InputStreamReader)12 DeploymentUnitProcessingException (org.jboss.as.server.deployment.DeploymentUnitProcessingException)12 Source (javax.xml.transform.Source)11 InputSource (org.xml.sax.InputSource)11