Search in sources :

Example 1 with DOMParser

use of org.cyberneko.html.parsers.DOMParser in project intellij-community by JetBrains.

the class FindJarFix method findJarsForFqn.

private void findJarsForFqn(final String fqn, final Editor editor) {
    final Map<String, String> libs = new HashMap<>();
    final Runnable runnable = () -> {
        try {
            final DOMParser parser = new DOMParser();
            parser.parse(CLASS_ROOT_URL + fqn.replace('.', '/') + CLASS_PAGE_EXT);
            final Document doc = parser.getDocument();
            if (doc != null) {
                final NodeList links = doc.getElementsByTagName(LINK_TAG_NAME);
                for (int i = 0; i < links.getLength(); i++) {
                    final Node link = links.item(i);
                    final String libName = link.getTextContent();
                    final NamedNodeMap attributes = link.getAttributes();
                    if (attributes != null) {
                        final Node href = attributes.getNamedItem(LINK_ATTR_NAME);
                        if (href != null) {
                            final String pathToJar = href.getTextContent();
                            if (pathToJar != null && (pathToJar.startsWith("/jar/") || pathToJar.startsWith("/class/../"))) {
                                libs.put(libName, SERVICE_URL + pathToJar);
                            }
                        }
                    }
                }
            }
        } catch (IOException ignore) {
        //
        } catch (Exception e) {
            //
            LOG.warn(e);
        }
    };
    final Task.Modal task = new Task.Modal(editor.getProject(), "Looking for libraries", true) {

        @Override
        public void run(@NotNull ProgressIndicator indicator) {
            indicator.setIndeterminate(true);
            runUncanceledRunnableWithProgress(runnable, indicator);
        }

        @Override
        public void onSuccess() {
            super.onSuccess();
            if (libs.isEmpty()) {
                HintManager.getInstance().showInformationHint(editor, "No libraries found for '" + fqn + "'");
            } else {
                final ArrayList<String> variants = new ArrayList<>(libs.keySet());
                Collections.sort(variants, (o1, o2) -> o1.compareTo(o2));
                final JBList libNames = new JBList(variants);
                libNames.installCellRenderer(o -> new JLabel(o.toString(), PlatformIcons.JAR_ICON, SwingConstants.LEFT));
                if (libs.size() == 1) {
                    final String jarName = libs.keySet().iterator().next();
                    final String url = libs.get(jarName);
                    initiateDownload(url, jarName);
                } else {
                    JBPopupFactory.getInstance().createListPopupBuilder(libNames).setTitle("Select a JAR file").setItemChoosenCallback(() -> {
                        final Object value = libNames.getSelectedValue();
                        if (value instanceof String) {
                            final String jarName = (String) value;
                            final String url = libs.get(jarName);
                            if (url != null) {
                                initiateDownload(url, jarName);
                            }
                        }
                    }).createPopup().showInBestPositionFor(editor);
                }
            }
        }
    };
    ProgressManager.getInstance().run(task);
}
Also used : Task(com.intellij.openapi.progress.Task) NamedNodeMap(org.w3c.dom.NamedNodeMap) NodeList(org.w3c.dom.NodeList) Node(org.w3c.dom.Node) IOException(java.io.IOException) Document(org.w3c.dom.Document) NotNull(org.jetbrains.annotations.NotNull) IncorrectOperationException(com.intellij.util.IncorrectOperationException) IOException(java.io.IOException) SAXException(org.xml.sax.SAXException) ProgressIndicator(com.intellij.openapi.progress.ProgressIndicator) JBList(com.intellij.ui.components.JBList) DOMParser(org.cyberneko.html.parsers.DOMParser)

Example 2 with DOMParser

use of org.cyberneko.html.parsers.DOMParser in project OpenOLAT by OpenOLAT.

the class QuoteAndTagFilter method filter.

/**
 * @see org.olat.core.util.filter.Filter#filter(java.lang.String)
 */
@Override
public String filter(String original) {
    try {
        DOMParser parser = new DOMParser();
        parser.parse(new InputSource(new StringReader(original)));
        Document document = parser.getDocument();
        StringBuilder sb = new StringBuilder();
        scanNode(document, sb);
        return sb.toString();
    } catch (SAXException e) {
        log.error("", e);
        return null;
    } catch (IOException e) {
        log.error("", e);
        return null;
    }
}
Also used : InputSource(org.xml.sax.InputSource) StringReader(java.io.StringReader) DOMParser(org.cyberneko.html.parsers.DOMParser) IOException(java.io.IOException) Document(org.w3c.dom.Document) SAXException(org.xml.sax.SAXException)

Example 3 with DOMParser

use of org.cyberneko.html.parsers.DOMParser in project muikku by otavanopisto.

the class MaterialFieldCollection method parseFields.

public void parseFields(String html) {
    if (!materialFields.isEmpty()) {
        materialFields.clear();
    }
    if (StringUtils.isNotBlank(html)) {
        StringReader htmlReader = new StringReader(html);
        try {
            DOMParser parser = new DOMParser();
            InputSource inputSource = new InputSource(htmlReader);
            parser.parse(inputSource);
            Document document = parser.getDocument();
            NodeList objectNodeList = document.getElementsByTagName("object");
            for (int i = 0, l = objectNodeList.getLength(); i < l; i++) {
                Node objectNode = objectNodeList.item(i);
                if (objectNode instanceof Element) {
                    Element objectElement = (Element) objectNode;
                    if (isMuikkuField(objectElement)) {
                        String fieldType = objectElement.getAttribute("type");
                        NodeList paramNodes = objectElement.getElementsByTagName("param");
                        String content = null;
                        for (int j = 0, jl = paramNodes.getLength(); j < jl; j++) {
                            Node paramNode = paramNodes.item(j);
                            if (paramNode instanceof Element) {
                                Element paramElement = (Element) paramNode;
                                if ("content".equals(paramElement.getAttribute("name"))) {
                                    content = paramElement.getAttribute("value");
                                    break;
                                }
                            }
                        }
                        if (StringUtils.isNotBlank(content)) {
                            ObjectMapper objectMapper = new ObjectMapper();
                            objectMapper.configure(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, false);
                            FieldMeta fieldMeta = objectMapper.readValue(content, FieldMeta.class);
                            materialFields.put(fieldMeta.getName(), new MaterialField(fieldMeta.getName(), fieldType, content));
                        }
                    }
                }
            }
        } catch (Exception e) {
            // TODO Proper exception handling
            throw new IllegalArgumentException("Malformed document structure: " + html);
        } finally {
            htmlReader.close();
        }
    }
}
Also used : InputSource(org.xml.sax.InputSource) NodeList(org.w3c.dom.NodeList) Node(org.w3c.dom.Node) Element(org.w3c.dom.Element) FieldMeta(fi.otavanopisto.muikku.plugins.material.fieldmeta.FieldMeta) Document(org.w3c.dom.Document) StringReader(java.io.StringReader) DOMParser(org.cyberneko.html.parsers.DOMParser) ObjectMapper(org.codehaus.jackson.map.ObjectMapper)

Example 4 with DOMParser

use of org.cyberneko.html.parsers.DOMParser in project nimbus by nimbus-org.

the class DataSetHtmlConverter method parseXml.

protected Document parseXml(InputStream inputStream) throws ConvertException {
    DOMParser parser = new DOMParser();
    InputSource inputSource = new InputSource(inputStream);
    if (characterEncodingToObject != null) {
        inputSource.setEncoding(characterEncodingToObject);
    }
    try {
        if (isSynchronizedDomParse) {
            final Object lock = parser.getClass();
            synchronized (lock) {
                parser.parse(inputSource);
            }
        } else {
            parser.parse(inputSource);
        }
    } catch (SAXException e) {
        throw new ConvertException("Failed to parse a stream.", e);
    } catch (IOException e) {
        throw new ConvertException("Failed to parse a stream.", e);
    }
    return parser.getDocument();
}
Also used : InputSource(org.xml.sax.InputSource) DOMParser(org.cyberneko.html.parsers.DOMParser) IOException(java.io.IOException) SAXException(org.xml.sax.SAXException)

Example 5 with DOMParser

use of org.cyberneko.html.parsers.DOMParser in project fess-crawler by codelibs.

the class HtmlTransformer method storeChildUrls.

protected void storeChildUrls(final ResponseData responseData, final ResultData resultData) {
    List<RequestData> requestDataList = new ArrayList<>();
    try (final InputStream is = responseData.getResponseBody()) {
        final DOMParser parser = getDomParser();
        parser.parse(new InputSource(is));
        final Document document = parser.getDocument();
        // base href
        final String baseHref = getBaseHref(document);
        URL url;
        try {
            url = new URL(baseHref == null ? responseData.getUrl() : baseHref);
        } catch (final MalformedURLException e) {
            url = new URL(responseData.getUrl());
        }
        for (final Map.Entry<String, String> entry : childUrlRuleMap.entrySet()) {
            for (final String childUrl : getUrlFromTagAttribute(url, document, entry.getKey(), entry.getValue(), responseData.getCharSet())) {
                requestDataList.add(RequestDataBuilder.newRequestData().get().url(childUrl).build());
            }
        }
        requestDataList = convertChildUrlList(requestDataList);
        resultData.addAllUrl(requestDataList);
        resultData.addAllUrl(responseData.getChildUrlSet());
        final RequestData requestData = responseData.getRequestData();
        resultData.removeUrl(requestData);
        resultData.removeUrl(getDuplicateUrl(requestData));
    } catch (final CrawlerSystemException e) {
        throw e;
    } catch (final Exception e) {
        throw new CrawlerSystemException("Could not store data.", e);
    }
}
Also used : InputSource(org.xml.sax.InputSource) MalformedURLException(java.net.MalformedURLException) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) Document(org.w3c.dom.Document) URL(java.net.URL) CrawlingAccessException(org.codelibs.fess.crawler.exception.CrawlingAccessException) TransformerException(javax.xml.transform.TransformerException) CrawlerSystemException(org.codelibs.fess.crawler.exception.CrawlerSystemException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) RequestData(org.codelibs.fess.crawler.entity.RequestData) CrawlerSystemException(org.codelibs.fess.crawler.exception.CrawlerSystemException) DOMParser(org.cyberneko.html.parsers.DOMParser) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Aggregations

DOMParser (org.cyberneko.html.parsers.DOMParser)12 Document (org.w3c.dom.Document)10 IOException (java.io.IOException)8 InputSource (org.xml.sax.InputSource)8 SAXException (org.xml.sax.SAXException)7 Node (org.w3c.dom.Node)6 NodeList (org.w3c.dom.NodeList)5 StringReader (java.io.StringReader)4 ArrayList (java.util.ArrayList)3 CrawlerSystemException (org.codelibs.fess.crawler.exception.CrawlerSystemException)3 BufferedInputStream (java.io.BufferedInputStream)2 InputStream (java.io.InputStream)2 UnsupportedEncodingException (java.io.UnsupportedEncodingException)2 URL (java.net.URL)2 LinkedHashMap (java.util.LinkedHashMap)2 Map (java.util.Map)2 TransformerException (javax.xml.transform.TransformerException)2 CrawlingAccessException (org.codelibs.fess.crawler.exception.CrawlingAccessException)2 NamedNodeMap (org.w3c.dom.NamedNodeMap)2 ProgressIndicator (com.intellij.openapi.progress.ProgressIndicator)1