use of com.gargoylesoftware.htmlunit.html.HtmlListItem in project core by z1lc.
the class MovieEtl method getObjects.
@Override
public List<Movie> getObjects() {
URI wantToSeeUri = Unchecked.get(() -> new URIBuilder().setScheme("https").setHost("rottentomatoes.com").setPath(String.format("user/id/%s/wts/", USER_ID)).setParameter("mediaType", "1").setParameter("wtsni", "wts").build());
try (WebClient webClient = CommonProvider.getHtmlUnitWebClient()) {
HtmlPage page = CommonProvider.retrying().get(() -> webClient.getPage(wantToSeeUri.toURL()));
List<HtmlListItem> ul = page.getByXPath("//li[contains(@class, 'bottom_divider')]");
return ul.stream().map(li -> {
HtmlElement link = li.getElementsByTagName("a").get(0);
String href = link.getAttribute("href");
String title = link.getAttribute("title");
String score = li.getElementsByTagName("span").stream().filter(elem -> elem.getAttribute("class").equals("tMeterScore")).findFirst().orElseThrow().asNormalizedText();
Matcher matcher = yearRegex.matcher(li.asNormalizedText());
Long year = null;
if (matcher.find()) {
year = Long.parseLong(matcher.group());
}
return Movie.MovieBuilder.aMovie().withId(ID_ISSUER.getAndIncrement()).withTitle(title).withUrl("https://www.rottentomatoes.com/" + href).withRating(Long.valueOf(score.replace("%", ""))).withYear(year).build();
}).collect(Collectors.toList());
}
}
Aggregations