Search in sources :

Example 1 with Whitelist

use of org.jsoup.safety.Whitelist in project MusicDNA by harjot-oberai.

the class LyricWiki method fromURL.

public static Lyrics fromURL(String url, String artist, String song) {
    if (url.endsWith("action=edit")) {
        return new Lyrics(NO_RESULT);
    }
    String text;
    String originalArtist = artist;
    String originalTitle = song;
    try {
        //url = URLDecoder.decode(url, "utf-8");
        Document lyricsPage = Jsoup.connect(url).get();
        Element lyricbox = lyricsPage.select("div.lyricBox").get(0);
        lyricbox.getElementsByClass("references").remove();
        String lyricsHtml = lyricbox.html();
        final Document.OutputSettings outputSettings = new Document.OutputSettings().prettyPrint(false);
        text = Jsoup.clean(lyricsHtml, "", new Whitelist().addTags("br"), outputSettings);
        if (text.contains("&#"))
            text = Parser.unescapeEntities(text, true);
        text = text.replaceAll("\\[\\d\\]", "").trim();
        String title = lyricsPage.getElementsByTag("title").get(0).text();
        int colon = title.indexOf(':');
        if (artist == null)
            artist = title.substring(0, colon).trim();
        if (song == null) {
            int end = title.lastIndexOf("Lyrics");
            song = title.substring(colon + 1, end).trim();
        }
    } catch (IndexOutOfBoundsException | IOException e) {
        return new Lyrics(ERROR);
    }
    try {
        artist = URLDecoder.decode(artist, "UTF-8");
        song = URLDecoder.decode(song, "UTF-8");
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    }
    if (text.contains("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") || text.equals("Instrumental <br />")) {
        Lyrics result = new Lyrics(NEGATIVE_RESULT);
        result.setArtist(artist);
        result.setTitle(song);
        return result;
    } else if (text.equals("") || text.length() < 3)
        return new Lyrics(NO_RESULT);
    else {
        Lyrics lyrics = new Lyrics(POSITIVE_RESULT);
        lyrics.setArtist(artist);
        lyrics.setTitle(song);
        lyrics.setOriginalArtist(originalArtist);
        lyrics.setOriginalTitle(originalTitle);
        lyrics.setText(text);
        lyrics.setSource("LyricsWiki");
        lyrics.setURL(url);
        return lyrics;
    }
}
Also used : Element(org.jsoup.nodes.Element) Whitelist(org.jsoup.safety.Whitelist) UnsupportedEncodingException(java.io.UnsupportedEncodingException) Net.getUrlAsString(com.sdsmdg.harjot.MusicDNA.utilities.Net.getUrlAsString) IOException(java.io.IOException) Document(org.jsoup.nodes.Document)

Aggregations

Net.getUrlAsString (com.sdsmdg.harjot.MusicDNA.utilities.Net.getUrlAsString)1 IOException (java.io.IOException)1 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1 Document (org.jsoup.nodes.Document)1 Element (org.jsoup.nodes.Element)1 Whitelist (org.jsoup.safety.Whitelist)1