Search in sources :

Example 1 with UserAgentContext

use of org.loboevolution.http.UserAgentContext in project LoboEvolution by LoboEvolution.

the class XMLContentHandler method createDocument.

private Document createDocument(String uri, String qName) {
    UserAgentContext context = new UserAgentContext();
    context.setUserAgentEnabled(false);
    DOMImplementationImpl domImpl = new DOMImplementationImpl(context);
    DocumentType doctype = domImpl.createDocumentType("HTML", null, null);
    return domImpl.createDocument(uri, "HTML", doctype);
}
Also used : UserAgentContext(org.loboevolution.http.UserAgentContext) DOMImplementationImpl(org.loboevolution.html.dom.domimpl.DOMImplementationImpl)

Example 2 with UserAgentContext

use of org.loboevolution.http.UserAgentContext in project LoboEvolution by LoboEvolution.

the class HtmlParser method parseToken.

/**
 * Parses text followed by one element.
 *
 * @param parent
 * @param reader
 * @param stopTags    If tags in this set are encountered, the method throws
 *                    StopException.
 * @return
 * @throws IOException
 * @throws StopException
 * @throws SAXException
 */
private int parseToken(final Node parent, final LineNumberReader reader, final Set<HTMLTag> stopTags, final LinkedList<String> ancestors) throws IOException, StopException, SAXException {
    final Document doc = this.document;
    final HTMLDocumentImpl htmlDoc = (HTMLDocumentImpl) doc;
    final StringBuilder textSb = this.readUpToTagBegin(reader);
    if (textSb == null) {
        return TOKEN_EOD;
    }
    if (textSb.length() != 0) {
        // int textLine = reader.getLineNumber();
        final StringBuilder decText = entityDecode(textSb);
        final Node textNode = doc.createTextNode(decText.toString());
        try {
            safeAppendChild(parent, textNode);
        } catch (final DOMException de) {
            if ((parent.getNodeType() != NodeType.DOCUMENT_NODE) || (de.getCode() != DOMException.HIERARCHY_REQUEST_ERR)) {
                logger.log(Level.WARNING, "parseToken(): Unable to append child to " + parent + ".", de);
            }
        }
    }
    if (this.justReadTagBegin) {
        String tag = this.readTag(parent, reader);
        if (tag == null) {
            return TOKEN_EOD;
        }
        String normalTag = tag.toUpperCase();
        try {
            if (tag.startsWith("!")) {
                if ("!--".equals(tag)) {
                    final StringBuilder comment = this.passEndOfComment(reader);
                    final StringBuilder decText = entityDecode(comment);
                    safeAppendChild(parent, doc.createComment(decText.toString()));
                    return TOKEN_COMMENT;
                } else if ("!DOCTYPE".equals(tag)) {
                    final String doctypeStr = this.parseEndOfTag(reader);
                    String qName = null;
                    String publicId = null;
                    String systemId = null;
                    if (doctypeStr.contains("PUBLIC")) {
                        final Matcher doctypeMatcher = doctypePattern.matcher(doctypeStr);
                        if (doctypeMatcher.matches()) {
                            qName = doctypeMatcher.group(1);
                            publicId = doctypeMatcher.group(2);
                            systemId = doctypeMatcher.group(3);
                        }
                    } else {
                        qName = doctypeStr.replace(">", "");
                    }
                    final DocumentTypeImpl doctype = new DocumentTypeImpl(qName, publicId, systemId);
                    htmlDoc.setDoctype(doctype);
                    needRoot = false;
                    return TOKEN_BAD;
                } else {
                    passEndOfTag(reader);
                    return TOKEN_BAD;
                }
            } else if (tag.startsWith("/")) {
                tag = tag.substring(1);
                normalTag = normalTag.substring(1);
                this.passEndOfTag(reader);
                return TOKEN_END_ELEMENT;
            } else if (tag.startsWith("?")) {
                tag = tag.substring(1);
                final StringBuilder data = readProcessingInstruction(reader);
                safeAppendChild(parent, doc.createProcessingInstruction(tag, data.toString()));
                return TOKEN_FULL_ELEMENT;
            } else {
                final int localIndex = normalTag.indexOf(':');
                final boolean tagHasPrefix = localIndex > 0;
                final String localName = tagHasPrefix ? normalTag.substring(localIndex + 1) : normalTag;
                Element element = doc.createElement(localName);
                element.setUserData(MODIFYING_KEY, Boolean.TRUE, null);
                try {
                    if (!this.justReadTagEnd) {
                        while (this.readAttribute(reader, element)) {
                        // EMPTY LOOP
                        }
                    }
                    if (stopTags != null && stopTags.contains(HTMLTag.get(normalTag))) {
                        // After MODIFYING_KEY is set.
                        throw new StopException(element);
                    }
                    // Add element to parent before children are added.
                    // This is necessary for incremental rendering.
                    safeAppendChild(parent, element);
                    if (!this.justReadEmptyElement) {
                        ElementInfo einfo = HTMLEntities.ELEMENT_INFOS.get(HTMLTag.get(localName.toUpperCase()));
                        int endTagType = einfo == null ? ElementInfo.END_ELEMENT_REQUIRED : einfo.getEndElementType();
                        if (endTagType != ElementInfo.END_ELEMENT_FORBIDDEN) {
                            boolean childrenOk = einfo == null || einfo.isChildElementOk();
                            Set<HTMLTag> newStopSet = einfo == null ? null : einfo.getStopTags();
                            if (newStopSet == null) {
                                if (endTagType == ElementInfo.END_ELEMENT_OPTIONAL) {
                                    newStopSet = Collections.singleton(HTMLTag.get(normalTag));
                                }
                            }
                            if (stopTags != null) {
                                if (newStopSet != null) {
                                    final Set<HTMLTag> newStopSet2 = new HashSet<>();
                                    newStopSet2.addAll(stopTags);
                                    newStopSet2.addAll(newStopSet);
                                    newStopSet = newStopSet2;
                                } else {
                                    newStopSet = endTagType == ElementInfo.END_ELEMENT_REQUIRED ? null : stopTags;
                                }
                            }
                            ancestors.addFirst(normalTag);
                            try {
                                for (; ; ) {
                                    try {
                                        int token;
                                        if ((einfo != null) && einfo.isNoScriptElement()) {
                                            final UserAgentContext ucontext = this.ucontext;
                                            if ((ucontext == null) || ucontext.isScriptingEnabled()) {
                                                token = this.parseForEndTag(parent, reader, tag, false, shouldDecodeEntities(einfo));
                                            } else {
                                                token = this.parseToken(element, reader, newStopSet, ancestors);
                                            }
                                        } else {
                                            token = childrenOk ? this.parseToken(element, reader, newStopSet, ancestors) : this.parseForEndTag(element, reader, tag, true, shouldDecodeEntities(einfo));
                                        }
                                        if (token == TOKEN_END_ELEMENT) {
                                            final String normalLastTag = this.normalLastTag;
                                            if (normalTag.equalsIgnoreCase(normalLastTag)) {
                                                return TOKEN_FULL_ELEMENT;
                                            } else {
                                                final ElementInfo closeTagInfo = HTMLEntities.ELEMENT_INFOS.get(HTMLTag.get(normalLastTag.toUpperCase()));
                                                if ((closeTagInfo == null) || (closeTagInfo.getEndElementType() != ElementInfo.END_ELEMENT_FORBIDDEN)) {
                                                    // TODO: Rather inefficient algorithm, but it's
                                                    // probably executed infrequently?
                                                    final Iterator<String> i = ancestors.iterator();
                                                    if (i.hasNext()) {
                                                        i.next();
                                                        while (i.hasNext()) {
                                                            final String normalAncestorTag = i.next();
                                                            if (normalLastTag.equals(normalAncestorTag)) {
                                                                normalTag = normalLastTag;
                                                                return TOKEN_END_ELEMENT;
                                                            }
                                                        }
                                                    }
                                                }
                                            // TODO: Working here
                                            }
                                        } else if (token == TOKEN_EOD) {
                                            return TOKEN_EOD;
                                        }
                                    } catch (final StopException se) {
                                        // newElement does not have a parent.
                                        final Element newElement = se.getElement();
                                        tag = newElement.getTagName();
                                        normalTag = tag.toUpperCase();
                                        if (stopTags != null && stopTags.contains(HTMLTag.get(normalTag))) {
                                            throw se;
                                        }
                                        einfo = HTMLEntities.ELEMENT_INFOS.get(HTMLTag.get(normalTag.toUpperCase()));
                                        endTagType = einfo == null ? ElementInfo.END_ELEMENT_REQUIRED : einfo.getEndElementType();
                                        childrenOk = einfo == null || einfo.isChildElementOk();
                                        newStopSet = einfo == null ? null : einfo.getStopTags();
                                        if (newStopSet == null) {
                                            if (endTagType == ElementInfo.END_ELEMENT_OPTIONAL) {
                                                newStopSet = Collections.singleton(HTMLTag.get(normalTag));
                                            }
                                        }
                                        if (stopTags != null && newStopSet != null) {
                                            final Set<HTMLTag> newStopSet2 = new HashSet<>();
                                            newStopSet2.addAll(stopTags);
                                            newStopSet2.addAll(newStopSet);
                                            newStopSet = newStopSet2;
                                        }
                                        ancestors.removeFirst();
                                        ancestors.addFirst(normalTag);
                                        // Switch element
                                        element.setUserData(MODIFYING_KEY, Boolean.FALSE, null);
                                        // newElement should have been suspended.
                                        element = newElement;
                                        // Add to parent
                                        safeAppendChild(parent, element);
                                        if (this.justReadEmptyElement) {
                                            return TOKEN_BEGIN_ELEMENT;
                                        }
                                    }
                                }
                            } finally {
                                ancestors.removeFirst();
                            }
                        }
                    }
                    return TOKEN_BEGIN_ELEMENT;
                } finally {
                    // This can inform elements to continue with notifications.
                    // It can also cause Javascript to be loaded / processed.
                    // Update: Elements now use Document.addJob() to delay processing
                    element.setUserData(MODIFYING_KEY, Boolean.FALSE, null);
                }
            }
        } finally {
            this.normalLastTag = normalTag;
        }
    } else {
        this.normalLastTag = null;
        return TOKEN_TEXT;
    }
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) UserAgentContext(org.loboevolution.http.UserAgentContext) Matcher(java.util.regex.Matcher) ElementInfo(org.loboevolution.info.ElementInfo) Node(org.loboevolution.html.node.Node) Element(org.loboevolution.html.node.Element) DocumentTypeImpl(org.loboevolution.html.dom.domimpl.DocumentTypeImpl) Document(org.loboevolution.html.node.Document) HTMLDocumentImpl(org.loboevolution.html.dom.domimpl.HTMLDocumentImpl) DOMException(com.gargoylesoftware.css.dom.DOMException) HTMLTag(org.loboevolution.html.HTMLTag) HashSet(java.util.HashSet)

Example 3 with UserAgentContext

use of org.loboevolution.http.UserAgentContext in project LoboEvolution by LoboEvolution.

the class DOMNodeTest method setUpBeforeClass.

@BeforeClass
public static void setUpBeforeClass() {
    UserAgentContext context = new UserAgentContext(true);
    context.setUserAgentEnabled(false);
    impl = new DOMImplementationImpl(context);
    document = sampleHtmlFile();
}
Also used : UserAgentContext(org.loboevolution.http.UserAgentContext) DOMImplementationImpl(org.loboevolution.html.dom.domimpl.DOMImplementationImpl) BeforeClass(org.junit.BeforeClass)

Example 4 with UserAgentContext

use of org.loboevolution.http.UserAgentContext in project LoboEvolution by LoboEvolution.

the class DOMDocumentTest method setUpBeforeClass.

@BeforeClass
public static void setUpBeforeClass() {
    UserAgentContext context = new UserAgentContext(true);
    context.setUserAgentEnabled(false);
    domImpl = new DOMImplementationImpl(context);
}
Also used : UserAgentContext(org.loboevolution.http.UserAgentContext) DOMImplementationImpl(org.loboevolution.html.dom.domimpl.DOMImplementationImpl) BeforeClass(org.junit.BeforeClass)

Example 5 with UserAgentContext

use of org.loboevolution.http.UserAgentContext in project LoboEvolution by LoboEvolution.

the class HTMLDocumentTest method testAppendChildTwoDoctypesError.

@Test
public void testAppendChildTwoDoctypesError() throws DOMException {
    UserAgentContext context = new UserAgentContext(true);
    context.setUserAgentEnabled(false);
    Document document = new DOMImplementationImpl(context).createDocument(null, null, null);
    document.appendChild(document.getImplementation().createDocumentType("foo", null, null));
    try {
        document.appendChild(document.getImplementation().createDocumentType("bar", null, null));
        fail("Must throw exception.");
    } catch (DOMException e) {
        assertEquals(DOMException.HIERARCHY_REQUEST_ERR, e.getCode());
    }
}
Also used : DOMException(com.gargoylesoftware.css.dom.DOMException) UserAgentContext(org.loboevolution.http.UserAgentContext) HTMLDocument(org.loboevolution.html.dom.HTMLDocument) Test(org.junit.Test) LoboUnitTest(org.loboevolution.driver.LoboUnitTest)

Aggregations

UserAgentContext (org.loboevolution.http.UserAgentContext)13 HtmlRendererContext (org.loboevolution.http.HtmlRendererContext)5 DOMImplementationImpl (org.loboevolution.html.dom.domimpl.DOMImplementationImpl)4 HtmlPanel (org.loboevolution.html.gui.HtmlPanel)4 Document (org.loboevolution.html.node.Document)4 BeforeClass (org.junit.BeforeClass)3 HTMLDocumentImpl (org.loboevolution.html.dom.domimpl.HTMLDocumentImpl)3 Context (org.mozilla.javascript.Context)3 DOMException (com.gargoylesoftware.css.dom.DOMException)2 InputStreamReader (java.io.InputStreamReader)2 SocketTimeoutException (java.net.SocketTimeoutException)2 URL (java.net.URL)2 Instant (java.time.Instant)2 WritableLineReader (org.loboevolution.html.io.WritableLineReader)2 TimingInfo (org.loboevolution.info.TimingInfo)2 RhinoException (org.mozilla.javascript.RhinoException)2 Scriptable (org.mozilla.javascript.Scriptable)2 BufferedReader (java.io.BufferedReader)1 File (java.io.File)1 InputStream (java.io.InputStream)1