use of org.jsoup.safety.Whitelist in project MusicDNA by harjot-oberai.
the class LyricWiki method fromURL.
public static Lyrics fromURL(String url, String artist, String song) {
if (url.endsWith("action=edit")) {
return new Lyrics(NO_RESULT);
}
String text;
String originalArtist = artist;
String originalTitle = song;
try {
//url = URLDecoder.decode(url, "utf-8");
Document lyricsPage = Jsoup.connect(url).get();
Element lyricbox = lyricsPage.select("div.lyricBox").get(0);
lyricbox.getElementsByClass("references").remove();
String lyricsHtml = lyricbox.html();
final Document.OutputSettings outputSettings = new Document.OutputSettings().prettyPrint(false);
text = Jsoup.clean(lyricsHtml, "", new Whitelist().addTags("br"), outputSettings);
if (text.contains("&#"))
text = Parser.unescapeEntities(text, true);
text = text.replaceAll("\\[\\d\\]", "").trim();
String title = lyricsPage.getElementsByTag("title").get(0).text();
int colon = title.indexOf(':');
if (artist == null)
artist = title.substring(0, colon).trim();
if (song == null) {
int end = title.lastIndexOf("Lyrics");
song = title.substring(colon + 1, end).trim();
}
} catch (IndexOutOfBoundsException | IOException e) {
return new Lyrics(ERROR);
}
try {
artist = URLDecoder.decode(artist, "UTF-8");
song = URLDecoder.decode(song, "UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
if (text.contains("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") || text.equals("Instrumental <br />")) {
Lyrics result = new Lyrics(NEGATIVE_RESULT);
result.setArtist(artist);
result.setTitle(song);
return result;
} else if (text.equals("") || text.length() < 3)
return new Lyrics(NO_RESULT);
else {
Lyrics lyrics = new Lyrics(POSITIVE_RESULT);
lyrics.setArtist(artist);
lyrics.setTitle(song);
lyrics.setOriginalArtist(originalArtist);
lyrics.setOriginalTitle(originalTitle);
lyrics.setText(text);
lyrics.setSource("LyricsWiki");
lyrics.setURL(url);
return lyrics;
}
}
Aggregations