Search in sources :

Example 1 with Source

use of net.htmlparser.jericho.Source in project vue-gwt by Axellience.

the class TemplateParser method parseHtmlTemplate.

/**
 * Parse a given HTML template and return the a result object containing the expressions
 * and a transformed HTML.
 * @param htmlTemplate The HTML template to process, as a String
 * @param context Context of the Component we are currently processing
 * @param messager Used to report errors in template during Annotation Processing
 * @return A {@link TemplateParserResult} containing the processed template and expressions
 */
public TemplateParserResult parseHtmlTemplate(String htmlTemplate, TemplateParserContext context, Messager messager) {
    this.context = context;
    this.logger = new TemplateParserLogger(context, messager);
    initJerichoConfig(this.logger);
    Source source = new Source(htmlTemplate);
    outputDocument = new OutputDocument(source);
    result = new TemplateParserResult(context);
    processImports(source);
    source.getChildElements().forEach(this::processElement);
    result.setProcessedTemplate(outputDocument.toString());
    return result;
}
Also used : OutputDocument(net.htmlparser.jericho.OutputDocument) TemplateParserResult(com.axellience.vuegwt.processors.component.template.parser.result.TemplateParserResult) Source(net.htmlparser.jericho.Source)

Example 2 with Source

use of net.htmlparser.jericho.Source in project lotro-tools by dmorcellet.

the class AreaPageParser method parseAreaPage.

/**
 * Parse the region page at the given URL.
 * @param identifier Identifier of the area.
 * @return An area or <code>null</code> if an error occurred.
 */
public Area parseAreaPage(String identifier) {
    Area ret = null;
    String url = "http://lorebook.lotro.com/wiki/Area:" + identifier;
    try {
        DownloadService downloader = DownloadService.getInstance();
        String page = downloader.getPage(url);
        Source source = new Source(page);
        // <div class="lorebooktitle">Area: Bindbole Wood</div>
        Element titleTag = JerichoHtmlUtils.findElementByTagNameAndAttributeValue(source, HTMLElementName.DIV, "class", "lorebooktitle");
        String name = "";
        if (titleTag != null) {
            name = CharacterReference.decodeCollapseWhiteSpace(titleTag.getContent());
            if (name.startsWith(TITLE_SEED)) {
                name = name.substring(TITLE_SEED.length()).trim();
            }
        }
        _area = new Area(identifier, name);
        // <div id="regionQuests"
        Element questsTable = JerichoHtmlUtils.findElementByTagNameAndAttributeValue(source, HTMLElementName.TABLE, "id", "region_quests_table");
        if (questsTable != null) {
            parseQuests(questsTable);
        }
        ret = _area;
        _area = null;
    } catch (Exception e) {
        _logger.error("Cannot parse region page [" + url + "]", e);
    }
    return ret;
}
Also used : Area(delta.games.lotro.lore.region.Area) Element(net.htmlparser.jericho.Element) DownloadService(delta.games.lotro.utils.DownloadService) Source(net.htmlparser.jericho.Source)

Example 3 with Source

use of net.htmlparser.jericho.Source in project lotro-tools by dmorcellet.

the class RegionPageParser method parseRegionPage.

/**
 * Parse a region page.
 * @param identifier Identifier of the region.
 * @return A region or <code>null</code> if an error occurred.
 */
public Region parseRegionPage(String identifier) {
    Region ret = null;
    String url = "http://lorebook.lotro.com/wiki/Region:" + identifier;
    try {
        DownloadService downloader = DownloadService.getInstance();
        String page = downloader.getPage(url);
        Source source = new Source(page);
        // <div class="lorebooktitle">Region: The Shire</div>
        Element titleTag = JerichoHtmlUtils.findElementByTagNameAndAttributeValue(source, HTMLElementName.DIV, "class", "lorebooktitle");
        String name = "";
        if (titleTag != null) {
            name = CharacterReference.decodeCollapseWhiteSpace(titleTag.getContent());
            if (name.startsWith(TITLE_SEED)) {
                name = name.substring(TITLE_SEED.length()).trim();
            }
        }
        _region = new Region(identifier, name);
        // <div class="regionAreas widget ui-corner-all">
        Element regionAreas = JerichoHtmlUtils.findElementByTagNameAndAttributeValue(source, HTMLElementName.DIV, "class", "regionAreas widget ui-corner-all");
        if (regionAreas != null) {
            parseAreas(regionAreas);
        }
        ret = _region;
        _region = null;
    } catch (Exception e) {
        _logger.error("Cannot parse region page [" + url + "]", e);
    }
    return ret;
}
Also used : Element(net.htmlparser.jericho.Element) Region(delta.games.lotro.lore.region.Region) DownloadService(delta.games.lotro.utils.DownloadService) Source(net.htmlparser.jericho.Source)

Example 4 with Source

use of net.htmlparser.jericho.Source in project lotro-tools by dmorcellet.

the class ItemPageParser method findIdentifiers.

private void findIdentifiers(List<Item> items) {
    String url = "http://lorebook.lotro.com/index.php?title=" + _key + "&action=edit";
    DownloadService downloader = DownloadService.getInstance();
    try {
        String page = downloader.getPage(url);
        Source s = new Source(page);
        // <textarea id="wpTextbox1"
        Element pageSource = JerichoHtmlUtils.findElementByTagNameAndAttributeValue(s, HTMLElementName.TEXTAREA, "id", "wpTextbox1");
        if (pageSource != null) {
            String text = JerichoHtmlUtils.getTextFromTag(pageSource);
            parsePageSource(text, items);
        } else {
            _logger.warn("Cannot find identifiers!");
        }
    } catch (Exception e) {
        _logger.error("Parsing error", e);
    }
}
Also used : Element(net.htmlparser.jericho.Element) DownloadService(delta.games.lotro.utils.DownloadService) Source(net.htmlparser.jericho.Source) InputSource(org.xml.sax.InputSource)

Example 5 with Source

use of net.htmlparser.jericho.Source in project lotro-tools by dmorcellet.

the class QuestPageParser method parseQuestPage.

/**
 * Parse the quest page at the given URL.
 * @param url URL of quest page.
 * @return A list of quests or <code>null</code> if an error occurred.
 */
public List<QuestDescription> parseQuestPage(String url) {
    List<QuestDescription> quests = null;
    try {
        DownloadService downloader = DownloadService.getInstance();
        String page = downloader.getPage(url);
        Source source = new Source(page);
        // <div id="lorebookNoedit">
        Element lorebook = JerichoHtmlUtils.findElementByTagNameAndAttributeValue(source, HTMLElementName.DIV, "id", "lorebookNoedit");
        if (lorebook != null) {
            // identifier
            // <a id="ca-nstab-quest" class="lorebook_action_link" href="/wiki/Quest:A_Feminine_Curve_to_the_Steel">Article</a>
            _key = null;
            Element articleLink = JerichoHtmlUtils.findElementByTagNameAndAttributeValue(source, HTMLElementName.A, "id", "ca-nstab-quest");
            if (articleLink != null) {
                String thisURL = articleLink.getAttributeValue("href");
                if ((thisURL != null) && (thisURL.startsWith(QUEST_URL_SEED))) {
                    _key = thisURL.substring(QUEST_URL_SEED.length()).trim();
                }
            }
            quests = new ArrayList<QuestDescription>();
            List<Element> questSections = JerichoHtmlUtils.findElementsByTagNameAndAttributeValue(lorebook, HTMLElementName.DIV, "class", "lorebookquest");
            if ((questSections != null) && (questSections.size() > 0)) {
                for (Element questSection : questSections) {
                    QuestDescription quest = parseQuestSection(questSection);
                    if (quest != null) {
                        // System.out.println(quest.dump());
                        quests.add(quest);
                        quest.setKey(_key);
                    }
                }
            }
            findIdentifiers(quests);
        }
    } catch (Exception e) {
        quests = null;
        _logger.error("Cannot parse quest page [" + url + "]", e);
    }
    return quests;
}
Also used : QuestDescription(delta.games.lotro.lore.quests.QuestDescription) Element(net.htmlparser.jericho.Element) DownloadService(delta.games.lotro.utils.DownloadService) InputSource(org.xml.sax.InputSource) Source(net.htmlparser.jericho.Source)

Aggregations

Source (net.htmlparser.jericho.Source)108 HttpMessage (org.parosproxy.paros.network.HttpMessage)87 Test (org.junit.jupiter.api.Test)84 SpiderParam (org.zaproxy.zap.spider.SpiderParam)26 Element (net.htmlparser.jericho.Element)16 DownloadService (delta.games.lotro.utils.DownloadService)11 URI (org.apache.commons.httpclient.URI)9 InputSource (org.xml.sax.InputSource)6 DefaultValueGenerator (org.zaproxy.zap.model.DefaultValueGenerator)6 HistoryReference (org.parosproxy.paros.model.HistoryReference)4 FileInputStream (java.io.FileInputStream)2 URL (java.net.URL)2 ArrayList (java.util.ArrayList)2 Date (java.util.Date)2 Attribute (net.htmlparser.jericho.Attribute)2 OutputDocument (net.htmlparser.jericho.OutputDocument)2 DatabaseException (org.parosproxy.paros.db.DatabaseException)2 HttpMalformedHeaderException (org.parosproxy.paros.network.HttpMalformedHeaderException)2 TemplateParserResult (com.axellience.vuegwt.processors.component.template.parser.result.TemplateParserResult)1 KBArticle (com.liferay.knowledgebase.model.KBArticle)1