use of net.htmlparser.jericho.TextExtractor in project lotro-tools by dmorcellet.
the class CharacterLogPageParser method parseLogItem.
private CharacterLogItem parseLogItem(Element tr) {
/*
<td class="char">
<a href="/home/character/2427907/146366987891794854">Glumlug</a>
</td>
<td class="date">2011/11/24</td>
<td class="details">
<img src="http://content.turbine.com/sites/playerportal/modules/lotro-base/images/icons/log/icon_levelup.png">
Reached level 70
</td>
*/
/*
<td class="char">
<a href="/home/character/2427907/146366987891794854">Glumlug</a>
</td>
<td class="date">2011/11/24</td>
<td class="details">
<a href="http://lorebook.lotro.com/wiki/Special:LotroResource?id=1879208735">
<img src="http://content.turbine.com/sites/playerportal/modules/lotro-base/images/icons/log/icon_quest.png">
Completed 'The Practiced Arm'
</a>
</td>
*/
CharacterLogItem ret = null;
List<Element> tds = tr.getAllElements(HTMLElementName.TD);
if ((tds != null) && (tds.size() == 3)) {
if (_characterName == null) {
Element charName = tds.get(0);
_characterName = JerichoHtmlUtils.getTagContents(charName, HTMLElementName.A);
}
Element tdDate = tds.get(1);
String dateStr = CharacterReference.decodeCollapseWhiteSpace(tdDate.getContent());
LogItemType type = null;
Element tdDetails = tds.get(2);
List<Element> imgs = tdDetails.getAllElements(HTMLElementName.IMG);
Element img = null;
if ((imgs != null) && (imgs.size() == 1)) {
img = imgs.get(0);
String imgSrc = img.getAttributeValue("src");
type = findType(imgSrc);
}
String url = null;
Element a = tdDetails.getFirstElement(HTMLElementName.A);
if (a != null) {
url = a.getAttributeValue("href");
}
TextExtractor extractor = tdDetails.getTextExtractor();
if (img != null)
extractor.excludeElement(img.getStartTag());
if (a != null)
extractor.excludeElement(a.getStartTag());
String label = extractor.toString().trim();
if ((dateStr != null) && (type != null) && (label != null)) {
try {
Calendar c = GregorianCalendar.getInstance(TimeZone.getTimeZone("GMT"));
String[] items = dateStr.split("/");
int year = Integer.parseInt(items[0]);
int month = Integer.parseInt(items[1]);
int day = Integer.parseInt(items[2]);
label = tuneLabel(label);
if (url != null) {
url = url.trim();
}
c.setTimeInMillis(0);
c.set(year, month - 1, day);
long date = c.getTimeInMillis();
ret = new CharacterLogItem(date, type, label, url);
} catch (Exception e) {
_logger.error("Cannot parse LOTRO character log item!", e);
}
}
}
return ret;
}
use of net.htmlparser.jericho.TextExtractor in project lotro-tools by dmorcellet.
the class RewardsHTMLParser method parseItemReward.
private void parseItemReward(Element rewardDiv, Rewards rewards) {
ObjectsSet objects = null;
// System.out.println("Item reward!");
String key = "";
List<Element> divs = rewardDiv.getAllElements(HTMLElementName.DIV);
// remove reward div
divs.remove(0);
for (Element div : divs) {
List<Element> strongs = div.getAllElements(HTMLElementName.STRONG);
if (strongs.size() > 0) {
Element strong = strongs.get(0);
key = CharacterReference.decodeCollapseWhiteSpace(strong.getContent());
key = cleanupFieldName(key);
if (RECEIVE_KEY.equals(key)) {
objects = rewards.getObjects();
} else if (SELECT_ONE_OF_KEY.equals(key)) {
objects = rewards.getSelectObjects();
} else {
_logger.warn(_objectId + ": unmanaged object selection key [" + key + "]");
}
} else {
List<Element> as = div.getAllElements(HTMLElementName.A);
if ((as != null) && (as.size() == 2)) {
Element iconItem = as.get(0);
String iconURL = null;
List<Element> imgs = iconItem.getAllElements(HTMLElementName.IMG);
if ((imgs != null) && (imgs.size() == 1)) {
Element img = imgs.get(0);
iconURL = img.getAttributeValue("src");
// System.out.println("Icon: "+iconURL);
}
Element textItem = as.get(1);
String itemName = CharacterReference.decodeCollapseWhiteSpace(textItem.getContent());
String url = textItem.getAttributeValue("href");
ObjectItem item = new ObjectItem(itemName);
item.setObjectURL(url);
item.setIconURL(iconURL);
int quantity = 1;
TextExtractor extractor = div.getTextExtractor();
extractor.excludeElement(iconItem.getStartTag());
extractor.excludeElement(textItem.getStartTag());
String text = extractor.toString();
int factorIndex = text.indexOf("(x");
if (factorIndex != -1) {
int parenthesisIndex = text.indexOf(')', factorIndex + 2);
if (parenthesisIndex != -1) {
String factorStr = text.substring(factorIndex + 2, parenthesisIndex);
quantity = NumericTools.parseInt(factorStr, 1);
}
}
if (objects != null) {
objects.addObject(item, quantity);
} else {
_logger.warn(_objectId + ": ignored object [" + item + "], quantity=" + quantity);
}
// System.out.println("Item: "+itemName+", URL: "+url);
}
}
}
/*
<div class="questReward">
<div>
<strong>Receive:</strong>
</div>
<div>
<a href="/wiki/Item:Drownholt_Compass">
<img class="icon" rel="" src="http://content.turbine.com/sites/lorebook.lotro.com/images/icons/item/device/it_tracking_mom_book9_chapter5.png">
</a>
<a href="/wiki/Item:Drownholt_Compass">Drownholt Compass</a>
(x5)
</div>
</div>
*/
}
Aggregations