Search in sources :

Example 1 with HttpStatusException

use of org.jsoup.HttpStatusException in project MusicDNA by harjot-oberai.

the class Genius method fromURL.

public static Lyrics fromURL(String url, String artist, String title) {
    Document lyricsPage;
    String text;
    try {
        lyricsPage = Jsoup.connect(url).userAgent(Net.USER_AGENT).get();
        Elements lyricsDiv = lyricsPage.select(".lyrics");
        if (lyricsDiv.isEmpty())
            throw new StringIndexOutOfBoundsException();
        else
            text = Jsoup.clean(lyricsDiv.html(), Whitelist.none().addTags("br")).trim();
    } catch (HttpStatusException e) {
        return new Lyrics(Lyrics.NO_RESULT);
    } catch (IOException | StringIndexOutOfBoundsException e) {
        e.printStackTrace();
        return new Lyrics(Lyrics.ERROR);
    }
    if (artist == null) {
        title = lyricsPage.getElementsByClass("text_title").get(0).text();
        artist = lyricsPage.getElementsByClass("text_artist").get(0).text();
    }
    Lyrics result = new Lyrics(Lyrics.POSITIVE_RESULT);
    if ("[Instrumental]".equals(text))
        result = new Lyrics(Lyrics.NEGATIVE_RESULT);
    Pattern pattern = Pattern.compile("\\[.+\\]");
    StringBuilder builder = new StringBuilder();
    for (String line : text.split("<br> ")) {
        String strippedLine = line.replaceAll("\\s", "");
        if (!pattern.matcher(strippedLine).matches() && !(strippedLine.isEmpty() && builder.length() == 0))
            builder.append(line.replaceAll("\\P{Print}", "")).append("<br/>");
    }
    if (builder.length() > 5)
        builder.delete(builder.length() - 5, builder.length());
    result.setArtist(artist);
    result.setTitle(title);
    result.setText(Normalizer.normalize(builder.toString(), Normalizer.Form.NFD));
    result.setURL(url);
    result.setSource("Genius");
    return result;
}
Also used : Pattern(java.util.regex.Pattern) HttpStatusException(org.jsoup.HttpStatusException) IOException(java.io.IOException) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements)

Example 2 with HttpStatusException

use of org.jsoup.HttpStatusException in project MusicDNA by harjot-oberai.

the class AZLyrics method fromURL.

public static Lyrics fromURL(String url, String artist, String song) {
    String html;
    try {
        Document document = Jsoup.connect(url).userAgent(Net.USER_AGENT).get();
        if (document.location().contains("azlyrics"))
            html = document.html();
        else
            throw new IOException("Redirected to wrong domain " + document.location());
    } catch (HttpStatusException e) {
        return new Lyrics(Lyrics.NO_RESULT);
    } catch (IOException e) {
        e.printStackTrace();
        return new Lyrics(Lyrics.ERROR);
    }
    Pattern p = Pattern.compile("Sorry about that. -->(.*)", Pattern.DOTALL);
    Matcher matcher = p.matcher(html);
    if (artist == null || song == null) {
        Pattern metaPattern = Pattern.compile("ArtistName = \"(.*)\";\r\nSongName = \"(.*)\";\r\n", Pattern.DOTALL);
        Matcher metaMatcher = metaPattern.matcher(html);
        if (metaMatcher.find()) {
            artist = metaMatcher.group(1);
            song = metaMatcher.group(2);
            song = song.substring(0, song.indexOf('"'));
        } else
            artist = song = "";
    }
    if (matcher.find()) {
        Lyrics l = new Lyrics(Lyrics.POSITIVE_RESULT);
        l.setArtist(artist);
        String text = matcher.group(1);
        text = text.substring(0, text.indexOf("</div>"));
        text = text.replaceAll("\\[[^\\[]*\\]", "");
        l.setText(text);
        l.setTitle(song);
        l.setURL(url);
        l.setSource("AZLyrics");
        return l;
    } else
        return new Lyrics(Lyrics.NEGATIVE_RESULT);
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) HttpStatusException(org.jsoup.HttpStatusException) IOException(java.io.IOException) Document(org.jsoup.nodes.Document)

Example 3 with HttpStatusException

use of org.jsoup.HttpStatusException in project MusicDNA by harjot-oberai.

the class PLyrics method fromURL.

public static Lyrics fromURL(String url, String artist, String song) {
    String html;
    try {
        Document document = Jsoup.connect(url).userAgent(Net.USER_AGENT).get();
        if (document.location().contains(domain))
            html = document.html();
        else
            throw new IOException("Redirected to wrong domain " + document.location());
    } catch (HttpStatusException e) {
        return new Lyrics(Lyrics.NO_RESULT);
    } catch (IOException e) {
        e.printStackTrace();
        return new Lyrics(Lyrics.ERROR);
    }
    Pattern p = Pattern.compile("<!-- start of lyrics -->(.*)<!-- end of lyrics -->", Pattern.DOTALL);
    Matcher matcher = p.matcher(html);
    if (artist == null || song == null) {
        Pattern metaPattern = Pattern.compile("ArtistName = \"(.*)\";\r\nSongName = \"(.*)\";\r\n", Pattern.DOTALL);
        Matcher metaMatcher = metaPattern.matcher(html);
        if (metaMatcher.find()) {
            artist = metaMatcher.group(1);
            song = metaMatcher.group(2);
            song = song.substring(0, song.indexOf('"'));
        } else
            artist = song = "";
    }
    if (matcher.find()) {
        Lyrics l = new Lyrics(Lyrics.POSITIVE_RESULT);
        l.setArtist(artist);
        String text = matcher.group(1);
        text = text.replaceAll("\\[[^\\[]*\\]", "");
        l.setText(text);
        l.setTitle(song);
        l.setURL(url);
        l.setSource("PLyrics");
        return l;
    } else
        return new Lyrics(Lyrics.NEGATIVE_RESULT);
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) HttpStatusException(org.jsoup.HttpStatusException) IOException(java.io.IOException) Document(org.jsoup.nodes.Document)

Example 4 with HttpStatusException

use of org.jsoup.HttpStatusException in project MusicDNA by harjot-oberai.

the class UrbanLyrics method fromURL.

public static Lyrics fromURL(String url, String artist, String song) {
    String html;
    try {
        Document document = Jsoup.connect(url).userAgent(Net.USER_AGENT).get();
        if (document.location().contains(domain))
            html = document.html();
        else
            throw new IOException("Redirected to wrong domain " + document.location());
    } catch (HttpStatusException e) {
        return new Lyrics(Lyrics.NO_RESULT);
    } catch (IOException e) {
        e.printStackTrace();
        return new Lyrics(Lyrics.ERROR);
    }
    Pattern p = Pattern.compile("<!-- lyrics start -->(.*)<!-- lyrics end -->", Pattern.DOTALL);
    Matcher matcher = p.matcher(html);
    if (artist == null || song == null) {
        Pattern metaPattern = Pattern.compile("ArtistName = \"(.*)\";\r\nSongName = \"(.*)\";\r\n", Pattern.DOTALL);
        Matcher metaMatcher = metaPattern.matcher(html);
        if (metaMatcher.find()) {
            artist = metaMatcher.group(1);
            song = metaMatcher.group(2);
            song = song.substring(0, song.indexOf('"'));
        } else
            artist = song = "";
    }
    if (matcher.find()) {
        Lyrics l = new Lyrics(Lyrics.POSITIVE_RESULT);
        l.setArtist(artist);
        String text = matcher.group(1);
        text = text.replaceAll("\\[[^\\[]*\\]", "");
        l.setText(text);
        l.setTitle(song);
        l.setURL(url);
        l.setSource("UrbanLyrics");
        return l;
    } else
        return new Lyrics(Lyrics.NEGATIVE_RESULT);
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) HttpStatusException(org.jsoup.HttpStatusException) IOException(java.io.IOException) Document(org.jsoup.nodes.Document)

Example 5 with HttpStatusException

use of org.jsoup.HttpStatusException in project MusicDNA by harjot-oberai.

the class LyricsMania method fromURL.

@Reflection
public static Lyrics fromURL(String url, String artist, String title) {
    String text;
    try {
        Document document = Jsoup.connect(url).userAgent(Net.USER_AGENT).get();
        Element lyricsBody = document.getElementsByClass("lyrics-body").get(0);
        // lyricsBody.select("div").last().remove();
        text = Jsoup.clean(lyricsBody.html(), "", Whitelist.basic().addTags("div"));
        text = text.substring(text.indexOf("</strong>") + 10, text.lastIndexOf("</div>"));
        String[] keywords = document.getElementsByTag("meta").attr("name", "keywords").get(0).attr("content").split(",");
        if (artist == null)
            artist = document.getElementsByClass("lyrics-nav-menu").get(0).getElementsByTag("a").get(0).text();
        if (title == null)
            title = keywords[0];
    } catch (HttpStatusException | IndexOutOfBoundsException e) {
        return new Lyrics(Lyrics.NO_RESULT);
    } catch (IOException e) {
        return new Lyrics(ERROR);
    }
    if (text.startsWith("Instrumental"))
        return new Lyrics(NEGATIVE_RESULT);
    Lyrics lyrics = new Lyrics(POSITIVE_RESULT);
    lyrics.setArtist(artist);
    lyrics.setTitle(title);
    lyrics.setURL(url);
    lyrics.setSource(domain);
    lyrics.setText(text.trim());
    return lyrics;
}
Also used : Element(org.jsoup.nodes.Element) HttpStatusException(org.jsoup.HttpStatusException) IOException(java.io.IOException) Document(org.jsoup.nodes.Document) Reflection(com.sdsmdg.harjot.MusicDNA.annotations.Reflection)

Aggregations

IOException (java.io.IOException)6 HttpStatusException (org.jsoup.HttpStatusException)6 Document (org.jsoup.nodes.Document)6 Pattern (java.util.regex.Pattern)4 Matcher (java.util.regex.Matcher)3 Reflection (com.sdsmdg.harjot.MusicDNA.annotations.Reflection)1 Connection (org.jsoup.Connection)1 Element (org.jsoup.nodes.Element)1 Elements (org.jsoup.select.Elements)1 Test (org.junit.Test)1