Search in sources :

Example 1 with TextExtractor

use of net.htmlparser.jericho.TextExtractor in project lotro-tools by dmorcellet.

the class CharacterLogPageParser method parseLogItem.

private CharacterLogItem parseLogItem(Element tr) {
    /*
<td class="char">
<a href="/home/character/2427907/146366987891794854">Glumlug</a>
</td>
<td class="date">2011/11/24</td>
<td class="details">
<img src="http://content.turbine.com/sites/playerportal/modules/lotro-base/images/icons/log/icon_levelup.png">
Reached level 70
</td>
 */
    /*
<td class="char">
<a href="/home/character/2427907/146366987891794854">Glumlug</a>
</td>
<td class="date">2011/11/24</td>
<td class="details">
<a href="http://lorebook.lotro.com/wiki/Special:LotroResource?id=1879208735">
<img src="http://content.turbine.com/sites/playerportal/modules/lotro-base/images/icons/log/icon_quest.png">
Completed 'The Practiced Arm'
</a>
</td>
 */
    CharacterLogItem ret = null;
    List<Element> tds = tr.getAllElements(HTMLElementName.TD);
    if ((tds != null) && (tds.size() == 3)) {
        if (_characterName == null) {
            Element charName = tds.get(0);
            _characterName = JerichoHtmlUtils.getTagContents(charName, HTMLElementName.A);
        }
        Element tdDate = tds.get(1);
        String dateStr = CharacterReference.decodeCollapseWhiteSpace(tdDate.getContent());
        LogItemType type = null;
        Element tdDetails = tds.get(2);
        List<Element> imgs = tdDetails.getAllElements(HTMLElementName.IMG);
        Element img = null;
        if ((imgs != null) && (imgs.size() == 1)) {
            img = imgs.get(0);
            String imgSrc = img.getAttributeValue("src");
            type = findType(imgSrc);
        }
        String url = null;
        Element a = tdDetails.getFirstElement(HTMLElementName.A);
        if (a != null) {
            url = a.getAttributeValue("href");
        }
        TextExtractor extractor = tdDetails.getTextExtractor();
        if (img != null)
            extractor.excludeElement(img.getStartTag());
        if (a != null)
            extractor.excludeElement(a.getStartTag());
        String label = extractor.toString().trim();
        if ((dateStr != null) && (type != null) && (label != null)) {
            try {
                Calendar c = GregorianCalendar.getInstance(TimeZone.getTimeZone("GMT"));
                String[] items = dateStr.split("/");
                int year = Integer.parseInt(items[0]);
                int month = Integer.parseInt(items[1]);
                int day = Integer.parseInt(items[2]);
                label = tuneLabel(label);
                if (url != null) {
                    url = url.trim();
                }
                c.setTimeInMillis(0);
                c.set(year, month - 1, day);
                long date = c.getTimeInMillis();
                ret = new CharacterLogItem(date, type, label, url);
            } catch (Exception e) {
                _logger.error("Cannot parse LOTRO character log item!", e);
            }
        }
    }
    return ret;
}
Also used : Element(net.htmlparser.jericho.Element) GregorianCalendar(java.util.GregorianCalendar) Calendar(java.util.Calendar) TextExtractor(net.htmlparser.jericho.TextExtractor) CharacterLogItem(delta.games.lotro.character.log.CharacterLogItem) LogItemType(delta.games.lotro.character.log.CharacterLogItem.LogItemType)

Example 2 with TextExtractor

use of net.htmlparser.jericho.TextExtractor in project lotro-tools by dmorcellet.

the class RewardsHTMLParser method parseItemReward.

private void parseItemReward(Element rewardDiv, Rewards rewards) {
    ObjectsSet objects = null;
    // System.out.println("Item reward!");
    String key = "";
    List<Element> divs = rewardDiv.getAllElements(HTMLElementName.DIV);
    // remove reward div
    divs.remove(0);
    for (Element div : divs) {
        List<Element> strongs = div.getAllElements(HTMLElementName.STRONG);
        if (strongs.size() > 0) {
            Element strong = strongs.get(0);
            key = CharacterReference.decodeCollapseWhiteSpace(strong.getContent());
            key = cleanupFieldName(key);
            if (RECEIVE_KEY.equals(key)) {
                objects = rewards.getObjects();
            } else if (SELECT_ONE_OF_KEY.equals(key)) {
                objects = rewards.getSelectObjects();
            } else {
                _logger.warn(_objectId + ": unmanaged object selection key [" + key + "]");
            }
        } else {
            List<Element> as = div.getAllElements(HTMLElementName.A);
            if ((as != null) && (as.size() == 2)) {
                Element iconItem = as.get(0);
                String iconURL = null;
                List<Element> imgs = iconItem.getAllElements(HTMLElementName.IMG);
                if ((imgs != null) && (imgs.size() == 1)) {
                    Element img = imgs.get(0);
                    iconURL = img.getAttributeValue("src");
                // System.out.println("Icon: "+iconURL);
                }
                Element textItem = as.get(1);
                String itemName = CharacterReference.decodeCollapseWhiteSpace(textItem.getContent());
                String url = textItem.getAttributeValue("href");
                ObjectItem item = new ObjectItem(itemName);
                item.setObjectURL(url);
                item.setIconURL(iconURL);
                int quantity = 1;
                TextExtractor extractor = div.getTextExtractor();
                extractor.excludeElement(iconItem.getStartTag());
                extractor.excludeElement(textItem.getStartTag());
                String text = extractor.toString();
                int factorIndex = text.indexOf("(x");
                if (factorIndex != -1) {
                    int parenthesisIndex = text.indexOf(')', factorIndex + 2);
                    if (parenthesisIndex != -1) {
                        String factorStr = text.substring(factorIndex + 2, parenthesisIndex);
                        quantity = NumericTools.parseInt(factorStr, 1);
                    }
                }
                if (objects != null) {
                    objects.addObject(item, quantity);
                } else {
                    _logger.warn(_objectId + ": ignored object [" + item + "], quantity=" + quantity);
                }
            // System.out.println("Item: "+itemName+", URL: "+url);
            }
        }
    }
/*

<div class="questReward">
<div>
<strong>Receive:</strong>
</div>
<div>
<a href="/wiki/Item:Drownholt_Compass">
<img class="icon" rel="" src="http://content.turbine.com/sites/lorebook.lotro.com/images/icons/item/device/it_tracking_mom_book9_chapter5.png">
</a>
<a href="/wiki/Item:Drownholt_Compass">Drownholt Compass</a>
&nbsp;(x5)
</div>
</div>
     */
}
Also used : ObjectItem(delta.games.lotro.common.objects.ObjectItem) Element(net.htmlparser.jericho.Element) ObjectsSet(delta.games.lotro.common.objects.ObjectsSet) TextExtractor(net.htmlparser.jericho.TextExtractor)

Aggregations

Element (net.htmlparser.jericho.Element)2 TextExtractor (net.htmlparser.jericho.TextExtractor)2 CharacterLogItem (delta.games.lotro.character.log.CharacterLogItem)1 LogItemType (delta.games.lotro.character.log.CharacterLogItem.LogItemType)1 ObjectItem (delta.games.lotro.common.objects.ObjectItem)1 ObjectsSet (delta.games.lotro.common.objects.ObjectsSet)1 Calendar (java.util.Calendar)1 GregorianCalendar (java.util.GregorianCalendar)1