Examples with Element - net.htmlparser.jericho.Element

Example 26 with Element

use of net.htmlparser.jericho.Element in project zaproxy by zaproxy.

the class ExtensionAntiCSRF method getTokensFromResponse.

/**
 * Gets the {@link #getAntiCsrfTokenNames() known} anti-csrf tokens from the given response.
 *
 * @param msg from where the tokens should be extracted.
 * @param source the HTML source document of the response.
 * @return the extracted anti-csrf tokens.
 * @since 2.2.0
 */
public List<AntiCsrfToken> getTokensFromResponse(HttpMessage msg, Source source) {
    List<AntiCsrfToken> list = new ArrayList<>();
    List<Element> formElements = source.getAllElements(HTMLElementName.FORM);
    if (formElements != null && formElements.size() > 0) {
        // Loop through all of the FORM tags
        log.debug("Found " + formElements.size() + " forms");
        int formIndex = 0;
        for (Element formElement : formElements) {
            List<Element> inputElements = formElement.getAllElements(HTMLElementName.INPUT);
            if (inputElements != null && inputElements.size() > 0) {
                // Loop through all of the INPUT elements
                log.debug("Found " + inputElements.size() + " inputs");
                for (Element inputElement : inputElements) {
                    String value = inputElement.getAttributeValue("VALUE");
                    if (value == null) {
                        continue;
                    }
                    String attId = inputElement.getAttributeValue("ID");
                    boolean found = false;
                    if (isKnownAntiCsrfToken(attId)) {
                        list.add(new AntiCsrfToken(msg, attId, value, formIndex));
                        found = true;
                    }
                    if (!found) {
                        String name = inputElement.getAttributeValue("NAME");
                        if (isKnownAntiCsrfToken(name)) {
                            list.add(new AntiCsrfToken(msg, name, value, formIndex));
                        }
                    }
                }
            }
            formIndex++;
        }
    }
    return list;
}

Also used : Element(net.htmlparser.jericho.Element) ArrayList(java.util.ArrayList)

Example 27 with Element

use of net.htmlparser.jericho.Element in project zaproxy by zaproxy.

the class HtmlContextAnalyser method getHtmlContexts.

public List<HtmlContext> getHtmlContexts(String target, HtmlContext targetContext, int ignoreFlags) {
    List<HtmlContext> contexts = new ArrayList<>();
    int offset = 0;
    while ((offset = htmlPage.indexOf(target, offset)) >= 0) {
        HtmlContext context = new HtmlContext(this.msg, target, offset, offset + target.length());
        offset += target.length();
        // Is it in quotes?
        char leftQuote = 0;
        for (int i = context.getStart() - 1; i > 0; i--) {
            char chr = htmlPage.charAt(i);
            if (isQuote(chr)) {
                leftQuote = chr;
                break;
            } else if (chr == '>') {
                // end of another tag
                break;
            }
        }
        if (leftQuote != 0) {
            for (int i = context.getEnd(); i < htmlPage.length(); i++) {
                char chr = htmlPage.charAt(i);
                if (leftQuote == chr) {
                    // matching quote
                    context.setSurroundingQuote("" + leftQuote);
                    break;
                } else if (isQuote(chr)) {
                    // Another non matching quote
                    break;
                } else if (chr == '<') {
                    // start of another tag
                    break;
                }
            }
        }
        // is it in an HTML comment?
        String prefix = htmlPage.substring(0, context.getStart());
        if (prefix.lastIndexOf("<!--") > prefix.lastIndexOf(">")) {
            // Also check closing comment?
            context.setHtmlComment(true);
        }
        // Work out the location in the DOM
        Element element = src.getEnclosingElement(context.getStart());
        if (element != null) {
            // See if its in an attribute
            boolean isInputTag = element.getName().equalsIgnoreCase(// Special case for input src attributes
            "input");
            boolean isImageInputTag = false;
            Iterator<Attribute> iter = element.getAttributes().iterator();
            while (iter.hasNext()) {
                Attribute att = iter.next();
                if (att.getValue() != null && att.getValue().toLowerCase().indexOf(target.toLowerCase()) >= 0) {
                    // Found the injected value
                    context.setTagAttribute(att.getName());
                    context.setInUrlAttribute(this.isUrlAttribute(att.getName()));
                    context.setInScriptAttribute(this.isScriptAttribute(att.getName()));
                }
                if (isInputTag && att.getName().equalsIgnoreCase("type") && "image".equalsIgnoreCase(att.getValue())) {
                    isImageInputTag = true;
                }
            }
            // record the tag hierarchy
            context.addParentTag(element.getName());
            if (!isInputTag || isImageInputTag) {
                // Input tags only use the src attribute if the type is 'image'
                context.setInTagWithSrc(this.isInTagWithSrcAttribute(element.getName()));
            }
            while ((element = element.getParentElement()) != null) {
                context.addParentTag(element.getName());
            }
        }
        if (targetContext == null) {
            // Always add
            contexts.add(context);
        } else if (targetContext.matches(context, ignoreFlags)) {
            // Matches the supplied context
            contexts.add(context);
        }
    }
    return contexts;
}

Also used : Attribute(net.htmlparser.jericho.Attribute) Element(net.htmlparser.jericho.Element) ArrayList(java.util.ArrayList)

Example 28 with Element

use of net.htmlparser.jericho.Element in project zaproxy by zaproxy.

the class SpiderHtmlParser method parseResource.

/**
 * @throws NullPointerException if {@code message} is null.
 */
@Override
public boolean parseResource(HttpMessage message, Source source, int depth) {
    // Prepare the source, if not provided
    if (source == null) {
        source = new Source(message.getResponseBody().toString());
    }
    // Get the context (base url)
    String baseURL = message.getRequestHeader().getURI().toString();
    // Try to see if there's any BASE tag that could change the base URL
    Element base = source.getFirstElement(HTMLElementName.BASE);
    if (base != null) {
        if (getLogger().isDebugEnabled()) {
            getLogger().debug("Base tag was found in HTML: " + base.getDebugInfo());
        }
        String href = base.getAttributeValue("href");
        if (href != null && !href.isEmpty()) {
            baseURL = URLCanonicalizer.getCanonicalURL(href, baseURL);
        }
    }
    // Parse the source
    parseSource(message, source, depth, baseURL);
    // Parse the comments
    if (params.isParseComments()) {
        List<StartTag> comments = source.getAllStartTags(StartTagType.COMMENT);
        for (StartTag comment : comments) {
            Source s = new Source(comment.getTagContent());
            if (!parseSource(message, s, depth, baseURL)) {
                Matcher matcher = PLAIN_COMMENTS_URL_PATTERN.matcher(s.toString());
                while (matcher.find()) {
                    processURL(message, depth, matcher.group(), baseURL);
                }
            }
        }
    }
    return false;
}

Also used : Matcher(java.util.regex.Matcher) Element(net.htmlparser.jericho.Element) Source(net.htmlparser.jericho.Source) StartTag(net.htmlparser.jericho.StartTag)

Example 29 with Element

use of net.htmlparser.jericho.Element in project zaproxy by zaproxy.

the class SpiderHtmlParser method parseSource.

/**
 * Parses the HTML Jericho source for the elements that contain references to other resources.
 *
 * @param message the message
 * @param source the source
 * @param depth the depth
 * @param baseURL the base url
 * @return {@code true} if at least one URL was found, {@code false} otherwise.
 */
private boolean parseSource(HttpMessage message, Source source, int depth, String baseURL) {
    getLogger().debug("Parsing an HTML message...");
    boolean resourcesfound = false;
    // Process A elements
    List<Element> elements = source.getAllElements(HTMLElementName.A);
    for (Element el : elements) {
        resourcesfound |= processAttributeElement(message, depth, baseURL, el, "href");
        resourcesfound |= processAttributeElement(message, depth, baseURL, el, "ping");
    }
    // Process AREA elements
    elements = source.getAllElements(HTMLElementName.AREA);
    for (Element el : elements) {
        resourcesfound |= processAttributeElement(message, depth, baseURL, el, "href");
        resourcesfound |= processAttributeElement(message, depth, baseURL, el, "ping");
    }
    // Process Frame Elements
    elements = source.getAllElements(HTMLElementName.FRAME);
    for (Element el : elements) {
        resourcesfound |= processAttributeElement(message, depth, baseURL, el, "src");
    }
    // Process IFrame Elements
    elements = source.getAllElements(HTMLElementName.IFRAME);
    for (Element el : elements) {
        resourcesfound |= processAttributeElement(message, depth, baseURL, el, "src");
    }
    // Process Link elements
    elements = source.getAllElements(HTMLElementName.LINK);
    for (Element el : elements) {
        resourcesfound |= processAttributeElement(message, depth, baseURL, el, "href");
    }
    // Process Script elements with src
    elements = source.getAllElements(HTMLElementName.SCRIPT);
    for (Element el : elements) {
        resourcesfound |= processAttributeElement(message, depth, baseURL, el, "src");
    }
    // Process Img elements
    elements = source.getAllElements(HTMLElementName.IMG);
    for (Element el : elements) {
        resourcesfound |= processAttributeElement(message, depth, baseURL, el, "src");
    }
    // Process META elements
    elements = source.getAllElements(HTMLElementName.META);
    for (Element el : elements) {
        // If we have http-equiv attribute, then urls can be found.
        String equiv = el.getAttributeValue("http-equiv");
        String content = el.getAttributeValue("content");
        if (equiv != null && content != null) {
            // http-equiv="location" content="url=http://foo.bar/..."
            if (equiv.equalsIgnoreCase("refresh") || equiv.equalsIgnoreCase("location")) {
                Matcher matcher = urlPattern.matcher(content);
                if (matcher.find()) {
                    String url = matcher.group(1);
                    processURL(message, depth, url, baseURL);
                    resourcesfound = true;
                }
            }
        }
    }
    return resourcesfound;
}

Also used : Matcher(java.util.regex.Matcher) Element(net.htmlparser.jericho.Element)

Example 30 with Element

use of net.htmlparser.jericho.Element in project liferay-ide by liferay.

the class AdminUtil method getKBArticleDiff.

public static String getKBArticleDiff(long resourcePrimKey, int sourceVersion, int targetVersion, String param) throws Exception {
    if (sourceVersion < KBArticleConstants.DEFAULT_VERSION) {
        sourceVersion = KBArticleConstants.DEFAULT_VERSION;
    }
    if (sourceVersion == targetVersion) {
        KBArticle kbArticle = KBArticleLocalServiceUtil.getKBArticle(resourcePrimKey, targetVersion);
        return BeanPropertiesUtil.getString(kbArticle, param);
    }
    KBArticle sourceKBArticle = KBArticleLocalServiceUtil.getKBArticle(resourcePrimKey, sourceVersion);
    KBArticle targetKBArticle = KBArticleLocalServiceUtil.getKBArticle(resourcePrimKey, targetVersion);
    String sourceHtml = BeanPropertiesUtil.getString(sourceKBArticle, param);
    String targetHtml = BeanPropertiesUtil.getString(targetKBArticle, param);
    String diff = DiffHtmlUtil.diff(new UnsyncStringReader(sourceHtml), new UnsyncStringReader(targetHtml));
    Source source = new Source(diff);
    OutputDocument outputDocument = new OutputDocument(source);
    for (Element element : source.getAllElements()) {
        StringBundler sb = new StringBundler(4);
        Attributes attributes = element.getAttributes();
        if (attributes == null) {
            continue;
        }
        Attribute changeTypeAttribute = attributes.get("changeType");
        if (changeTypeAttribute != null) {
            String changeTypeValue = changeTypeAttribute.getValue();
            if (changeTypeValue.contains("diff-added-image")) {
                sb.append("border: 10px solid #CFC; ");
            } else if (changeTypeValue.contains("diff-changed-image")) {
                sb.append("border: 10px solid #C6C6FD; ");
            } else if (changeTypeValue.contains("diff-removed-image")) {
                sb.append("border: 10px solid #FDC6C6; ");
            }
        }
        Attribute classAttribute = attributes.get("class");
        if (classAttribute != null) {
            String classValue = classAttribute.getValue();
            if (classValue.contains("diff-html-added")) {
                sb.append("background-color: #CFC; ");
            } else if (classValue.contains("diff-html-changed")) {
                sb.append("background-color: #C6C6FD; ");
            } else if (classValue.contains("diff-html-removed")) {
                sb.append("background-color: #FDC6C6; ");
                sb.append("text-decoration: line-through; ");
            }
        }
        if (Validator.isNull(sb.toString())) {
            continue;
        }
        Attribute styleAttribute = attributes.get("style");
        if (styleAttribute != null) {
            sb.append(GetterUtil.getString(styleAttribute.getValue()));
        }
        Map<String, String> map = outputDocument.replace(attributes, false);
        map.put("style", sb.toString());
    }
    return outputDocument.toString();
}

Also used : KBArticle(com.liferay.knowledgebase.model.KBArticle) Attribute(net.htmlparser.jericho.Attribute) UnsyncStringReader(com.liferay.portal.kernel.io.unsync.UnsyncStringReader) OutputDocument(net.htmlparser.jericho.OutputDocument) Element(net.htmlparser.jericho.Element) Attributes(net.htmlparser.jericho.Attributes) Source(net.htmlparser.jericho.Source) StringBundler(com.liferay.portal.kernel.util.StringBundler)

Aggregations

Element (net.htmlparser.jericho.Element)70 Source (net.htmlparser.jericho.Source)17 DownloadService (delta.games.lotro.utils.DownloadService)11 ArrayList (java.util.ArrayList)11 Segment (net.htmlparser.jericho.Segment)6 InputSource (org.xml.sax.InputSource)6 Context (com.cflint.plugins.Context)4 Matcher (java.util.regex.Matcher)4 StartTag (net.htmlparser.jericho.StartTag)4 BasicStatsSet (delta.games.lotro.character.stats.BasicStatsSet)3 Item (delta.games.lotro.lore.items.Item)3 List (java.util.List)3 Attribute (net.htmlparser.jericho.Attribute)3 CFScriptStatement (cfml.parsing.cfscript.script.CFScriptStatement)2 ParseException (cfml.parsing.reporting.ParseException)2 CFLintScanException (com.cflint.exception.CFLintScanException)2 ContextMessage (com.cflint.plugins.Context.ContextMessage)2 Money (delta.games.lotro.common.Money)2 CraftingResult (delta.games.lotro.lore.crafting.recipes.CraftingResult)2 Ingredient (delta.games.lotro.lore.crafting.recipes.Ingredient)2