Examples with Document - org.jsoup.nodes.Document

Example 86 with Document

use of org.jsoup.nodes.Document in project blueocean-plugin by jenkinsci.

the class StatePreloaderTest method test.

@Test
public void test() throws IOException, ExecutionException, InterruptedException, SAXException {
    // Create a project and run a build on it.
    FreeStyleProject freestyleProject = j.createProject(FreeStyleProject.class, "freestyle");
    FreeStyleBuild run = freestyleProject.scheduleBuild2(0).get();
    j.waitForCompletion(run);
    // Lets request the activity page for that project. The page should
    // contain some prefetched javascript for the pipeline
    // details + the runs on the page
    String projectBlueUrl = j.jenkins.getRootUrl() + BlueOceanWebURLBuilder.toBlueOceanURL(freestyleProject);
    Document doc = Jsoup.connect(projectBlueUrl + "/activity/").get();
    String script = doc.select("head script").toString();
    Assert.assertTrue(script.contains(String.format("setState('prefetchdata.%s',", PipelineStatePreloader.class.getSimpleName())));
    Assert.assertTrue(script.contains(String.format("setState('prefetchdata.%s',", PipelineActivityStatePreloader.class.getSimpleName())));
    Assert.assertTrue(script.contains("\"restUrl\":\"/blue/rest/organizations/jenkins/pipelines/freestyle/activities/?start=0&limit=26\""));
}

Also used : FreeStyleBuild(hudson.model.FreeStyleBuild) FreeStyleProject(hudson.model.FreeStyleProject) Document(org.jsoup.nodes.Document) Test(org.junit.Test) BaseTest(io.jenkins.blueocean.service.embedded.BaseTest)

Example 87 with Document

use of org.jsoup.nodes.Document in project AozoraEpub3 by hmdev.

the class WebAozoraConverter method convertToAozoraText.

////////////////////////////////////////////////////////////////
/** 変換実行
	 * @param urlString
	 * @param cachePath
	 * @param interval
	 * @param modifiedExpire この時間以内のキャッシュを更新分として扱う
	 * @param convertUpdated 更新時のみ出力
	 * @param convertModifiedOnly 追加更新分のみ変換
	 * @param convertModifiedTail 最新話から連続したもののみ変換
	 * @param beforeChapter 指定話数のみ変換 0は指定無し
	 * @return 変換スキップやキャンセルならnullを返す */
public File convertToAozoraText(String urlString, File cachePath, int interval, float modifiedExpire, boolean convertUpdated, boolean convertModifiedOnly, boolean convertModifiedTail, int beforeChapter) throws IOException {
    this.canceled = false;
    //日付一覧が取得できない場合は常に更新
    this.updated = true;
    this.interval = Math.max(500, interval);
    this.modifiedExpire = Math.max(0, modifiedExpire);
    this.convertUpdated = convertUpdated;
    this.convertModifiedOnly = convertModifiedOnly;
    this.convertModifiedTail = convertModifiedTail;
    this.beforeChapter = beforeChapter;
    //末尾の / をリダイレクトで取得
    urlString = urlString.trim();
    if (!urlString.endsWith("/") && !urlString.endsWith(".html") && !urlString.endsWith(".htm") && urlString.indexOf("?") == -1) {
        HttpURLConnection connection = null;
        try {
            connection = (HttpURLConnection) new URL(urlString + "/").openConnection();
            if (connection.getResponseCode() == 200) {
                urlString += "/";
                LogAppender.println("URL修正 : " + urlString);
            }
        } catch (Exception e) {
        } finally {
            if (connection != null)
                connection.disconnect();
        }
    }
    this.urlString = urlString;
    this.baseUri = urlString.substring(0, urlString.indexOf('/', urlString.indexOf("//") + 2));
    //String fqdn = baseUri.substring(baseUri.indexOf("//")+2);
    String listBaseUrl = urlString.substring(0, urlString.lastIndexOf('/') + 1);
    this.pageBaseUri = listBaseUrl;
    //http://を除外
    String urlFilePath = CharUtils.escapeUrlToFile(urlString.substring(urlString.indexOf("//") + 2));
    //http://を除外した文字列で比較
    /*ExtractInfo[] extractInfos = this.queryMap.get(ExtractId.PAGE_REGEX);
		if(extractInfos != null) {
			if (!extractInfos[0].matches(urlString)) {
				LogAppender.println("読み込み可能なURLではありません");
				return null;
			}
		}*/
    String urlParentPath = urlFilePath;
    boolean isPath = false;
    if (urlFilePath.endsWith("/")) {
        isPath = true;
        urlFilePath += "index.html";
    } else
        urlParentPath = urlFilePath.substring(0, urlFilePath.lastIndexOf('/') + 1);
    //変換結果
    this.dstPath = cachePath.getAbsolutePath() + "/";
    if (isPath)
        this.dstPath += urlParentPath;
    else
        this.dstPath += urlFilePath + "_converted/";
    File txtFile = new File(this.dstPath + "converted.txt");
    //表紙画像はtxtと同じ名前で保存 拡張子はpngだが表示はできるのでそのまま
    File coverImageFile = new File(this.dstPath + "converted.png");
    //更新情報格納先
    File updateInfoFile = new File(this.dstPath + "update.txt");
    //フォルダ以外がすでにあったら削除
    File parentFile = txtFile.getParentFile();
    if (parentFile.exists() && !parentFile.isDirectory()) {
        parentFile.delete();
    }
    parentFile.mkdirs();
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(txtFile), "UTF-8"));
    try {
        //urlStringのファイルをキャッシュ
        File cacheFile = new File(cachePath.getAbsolutePath() + "/" + urlFilePath);
        try {
            LogAppender.append(urlString);
            cacheFile(urlString, cacheFile, null);
            LogAppender.println(" : List Loaded.");
        } catch (Exception e) {
            e.printStackTrace();
            LogAppender.println("一覧ページの取得に失敗しました。 ");
            if (!cacheFile.exists())
                return null;
            LogAppender.println("キャッシュファイルを利用します。");
        }
        //パスならlist.txtの情報を元にキャッシュ後に青空txt変換して改ページで繋げて出力
        Document doc = Jsoup.parse(cacheFile, null);
        //表紙画像
        Elements images = getExtractElements(doc, this.queryMap.get(ExtractId.COVER_IMG));
        if (images != null) {
            printImage(null, images.get(0), coverImageFile);
        }
        //タイトル
        boolean hasTitle = false;
        String series = getExtractText(doc, this.queryMap.get(ExtractId.SERIES));
        if (series != null) {
            printText(bw, series);
            bw.append('\n');
            hasTitle = true;
        }
        String title = getExtractText(doc, this.queryMap.get(ExtractId.TITLE));
        if (title != null) {
            printText(bw, title);
            bw.append('\n');
            hasTitle = true;
        }
        if (!hasTitle) {
            LogAppender.println("SERIES/TITLE : タイトルがありません");
            return null;
        }
        //著者
        String author = getExtractText(doc, this.queryMap.get(ExtractId.AUTHOR));
        if (author != null) {
            printText(bw, author);
        }
        bw.append('\n');
        //説明
        Element description = getExtractFirstElement(doc, this.queryMap.get(ExtractId.DESCRIPTION));
        if (description != null) {
            bw.append('\n');
            bw.append("［＃区切り線］\n");
            bw.append('\n');
            bw.append("［＃ここから２字下げ］\n");
            bw.append("［＃ここから２字上げ］\n");
            printNode(bw, description, true);
            bw.append('\n');
            bw.append("［＃ここで字上げ終わり］\n");
            bw.append("［＃ここで字下げ終わり］\n");
            bw.append('\n');
            bw.append("［＃区切り線］\n");
            bw.append('\n');
        }
        String contentsUpdate = getExtractText(doc, this.queryMap.get(ExtractId.UPDATE));
        //章名称 変わった場合に出力
        String preChapterTitle = "";
        //各話のURL(フルパス)を格納
        Vector<String> chapterHrefs = new Vector<String>();
        Elements hrefs = getExtractElements(doc, this.queryMap.get(ExtractId.HREF));
        if (hrefs == null && this.queryMap.containsKey(ExtractId.HREF)) {
            LogAppender.println("HREF : 各話のリンク先URLが取得できません");
        }
        Vector<String> subtitles = getExtractStrings(doc, this.queryMap.get(ExtractId.SUBTITLE_LIST), true);
        if (subtitles == null && this.queryMap.containsKey(ExtractId.SUBTITLE_LIST)) {
            LogAppender.println("SUBTITLE_LIST : 各話タイトルが取得できません");
        }
        //更新のない各話のURL(フルパス)を格納
        //nullならキャッシュ更新無しで、空ならすべて更新される
        HashSet<String> noUpdateUrls = null;
        String[] postDateList = null;
        if (hrefs == null) {
            //ページ番号取得
            String pageNumString = getExtractText(doc, this.queryMap.get(ExtractId.PAGE_NUM));
            if (pageNumString == null && this.queryMap.containsKey(ExtractId.PAGE_NUM)) {
                LogAppender.println("PAGE_NUM : ページ数が取得できません");
            }
            int pageNum = -1;
            try {
                pageNum = Integer.parseInt(pageNumString);
            } catch (Exception e) {
            }
            Element pageUrlElement = getExtractFirstElement(doc, this.queryMap.get(ExtractId.PAGE_URL));
            if (pageUrlElement == null && this.queryMap.containsKey(ExtractId.PAGE_URL)) {
                LogAppender.println("PAGE_URL : ページ番号用のURLが取得できません");
            }
            if (pageNum > 0 && pageUrlElement != null) {
                ExtractInfo pageUrlExtractInfo = this.queryMap.get(ExtractId.PAGE_URL)[0];
                //リンク生成 1～ページ番号まで
                for (int i = 1; i <= pageNum; i++) {
                    String pageUrl = pageUrlElement.attr("href");
                    if (pageUrl != null) {
                        pageUrl = pageUrlExtractInfo.replace(pageUrl + "\t" + i);
                        if (pageUrl != null) {
                            if (!pageUrl.startsWith("http")) {
                                if (pageUrl.charAt(0) == '/')
                                    pageUrl = baseUri + pageUrl;
                                else
                                    pageUrl = listBaseUrl + pageUrl;
                            }
                            chapterHrefs.add(pageUrl);
                        }
                    }
                }
            } else {
                Elements contentDivs = getExtractElements(doc, this.queryMap.get(ExtractId.CONTENT_ARTICLE));
                if (contentDivs != null) {
                    //一覧のリンクはないが本文がある場合
                    docToAozoraText(bw, doc, false, null, null);
                } else {
                    LogAppender.println("一覧のURLが取得できませんでした");
                    return null;
                }
            }
        } else {
            //更新分のみ取得するようにするためhrefに対応した日付タグの文字列(innerHTML)を取得して保存しておく
            Elements updates = getExtractElements(doc, this.queryMap.get(ExtractId.SUB_UPDATE));
            if (updates == null && this.queryMap.containsKey(ExtractId.SUB_UPDATE)) {
                LogAppender.println("SUB_UPDATE : 更新確認情報が取得できません");
            }
            if (updates != null) {
                //更新しないURLのチェック用
                noUpdateUrls = createNoUpdateUrls(updateInfoFile, urlString, listBaseUrl, contentsUpdate, hrefs, updates);
            }
            //一覧のhrefをすべて取得
            for (Element href : hrefs) {
                String hrefString = href.attr("href");
                if (hrefString == null || hrefString.length() == 0)
                    continue;
                //パターンがあればマッチング
                ExtractInfo extractInfo = this.queryMap.get(ExtractId.HREF)[0];
                if (!extractInfo.hasPattern() || extractInfo.matches(hrefString)) {
                    String chapterHref = hrefString;
                    if (!hrefString.startsWith("http")) {
                        if (hrefString.charAt(0) == '/')
                            chapterHref = baseUri + hrefString;
                        else
                            chapterHref = listBaseUrl + hrefString;
                    }
                    chapterHrefs.add(chapterHref);
                }
            }
            postDateList = getPostDateList(doc, this.queryMap.get(ExtractId.CONTENT_UPDATE_LIST));
            if (postDateList == null && this.queryMap.containsKey(ExtractId.CONTENT_UPDATE_LIST)) {
                LogAppender.println("CONTENT_UPDATE_LIST : 一覧ページの更新日時情報が取得できません");
            }
        }
        if (chapterHrefs.size() > 0) {
            //全話で更新や追加があるかチェック
            updated = false;
            //追加更新対象の期限 これより大きければ追加更新
            long expire = System.currentTimeMillis() - (long) (this.modifiedExpire * 3600000);
            //追加更新分のみ出力時に利用
            HashSet<Integer> modifiedChapterIdx = null;
            //更新されていない最後の話数 0～
            int lastNoModifiedChapterIdx = -1;
            if (this.convertModifiedOnly) {
                modifiedChapterIdx = new HashSet<Integer>();
            }
            int chapterIdx = 0;
            for (String chapterHref : chapterHrefs) {
                if (this.canceled)
                    return null;
                if (chapterHref != null && chapterHref.length() > 0) {
                    //画像srcをフルパスにするときに使うページのパス
                    this.pageBaseUri = chapterHref;
                    if (!chapterHref.endsWith("/")) {
                        int idx = chapterHref.indexOf('/', 7);
                        if (idx > -1)
                            this.pageBaseUri = chapterHref.substring(0, idx);
                    }
                    //キャッシュ取得 ロードされたらWait 500ms
                    String chapterPath = CharUtils.escapeUrlToFile(chapterHref.substring(chapterHref.indexOf("//") + 2));
                    File chapterCacheFile = new File(cachePath.getAbsolutePath() + "/" + chapterPath + (chapterPath.endsWith("/") ? "index.html" : ""));
                    //hrefsのときは更新分のみurlsに入っている
                    boolean loaded = false;
                    //更新対象ならtrueに変更
                    boolean reload = false;
                    //nullでなく更新無しに含まれなければ再読込
                    if (noUpdateUrls != null && !noUpdateUrls.contains(chapterHref))
                        reload = true;
                    if (reload || !chapterCacheFile.exists()) {
                        LogAppender.append("[" + (chapterIdx + 1) + "/" + chapterHrefs.size() + "] " + chapterHref);
                        try {
                            try {
                                Thread.sleep(this.interval);
                            } catch (InterruptedException e) {
                            }
                            cacheFile(chapterHref, chapterCacheFile, urlString);
                            LogAppender.println(" : Loaded.");
                            //ファイルがロードされたら更新有り
                            this.updated = true;
                            loaded = true;
                        } catch (Exception e) {
                            e.printStackTrace();
                            LogAppender.println("htmlファイルが取得できませんでした : " + chapterHref);
                        }
                    }
                    //キャッシュされているファイルが指定時間内なら更新扱い
                    if (!loaded) {
                        if (this.modifiedExpire > 0 && (this.convertModifiedOnly || this.convertUpdated) && chapterCacheFile.lastModified() >= expire) {
                            LogAppender.append("[" + (chapterIdx + 1) + "/" + chapterHrefs.size() + "] " + chapterHref);
                            LogAppender.println(" : Modified.");
                            this.updated = true;
                        }
                    }
                    //更新分のみ出力時のチェック
                    if (this.convertModifiedOnly) {
                        //ファイルの更新日時で比較
                        if (chapterCacheFile.lastModified() >= expire) {
                            modifiedChapterIdx.add(chapterIdx);
                        } else {
                            if (this.convertModifiedTail) {
                                //最新から連続していない話は除外
                                modifiedChapterIdx.clear();
                            }
                            lastNoModifiedChapterIdx = chapterIdx;
                        }
                    }
                }
                chapterIdx++;
            }
            //更新が無くて変換もなければ終了
            if (!this.updated) {
                LogAppender.append("「" + title + "」");
                LogAppender.println("の更新はありません");
                if (this.convertUpdated)
                    return null;
            }
            if (this.convertModifiedOnly) {
                //更新前の話数を追加 昇順で重複もはじく
                if (this.beforeChapter > 0) {
                    int startIdx = Math.max(0, lastNoModifiedChapterIdx - this.beforeChapter + 1);
                    if (modifiedChapterIdx.size() == 0) {
                        //追加分なし
                        int idx = chapterHrefs.size() - 1;
                        for (int i = 0; i < this.beforeChapter; i++) {
                            modifiedChapterIdx.add(idx--);
                        }
                    } else {
                        //追加分あり
                        for (int i = startIdx; i <= lastNoModifiedChapterIdx; i++) {
                            modifiedChapterIdx.add(i);
                        }
                    }
                }
                if (modifiedChapterIdx.size() == 0) {
                    LogAppender.println("追加更新分はありません");
                    this.updated = false;
                    return null;
                }
            } else {
                //最新話数指定
                if (this.beforeChapter > 0) {
                    int idx = chapterHrefs.size() - 1;
                    modifiedChapterIdx = new HashSet<Integer>();
                    for (int i = 0; i < this.beforeChapter; i++) {
                        modifiedChapterIdx.add(idx--);
                    }
                }
            }
            //変換実行
            chapterIdx = 0;
            for (String chapterHref : chapterHrefs) {
                if (this.canceled)
                    return null;
                if (modifiedChapterIdx == null || modifiedChapterIdx.contains(chapterIdx)) {
                    //キャッシュファイル取得
                    String chapterPath = CharUtils.escapeUrlToFile(chapterHref.substring(chapterHref.indexOf("//") + 2));
                    File chapterCacheFile = new File(cachePath.getAbsolutePath() + "/" + chapterPath + (chapterPath.endsWith("/") ? "index.html" : ""));
                    //シリーズタイトルを出力
                    Document chapterDoc = Jsoup.parse(chapterCacheFile, null);
                    String chapterTitle = getExtractText(chapterDoc, this.queryMap.get(ExtractId.CONTENT_CHAPTER));
                    boolean newChapter = false;
                    if (chapterTitle != null && !preChapterTitle.equals(chapterTitle)) {
                        newChapter = true;
                        preChapterTitle = chapterTitle;
                        bw.append("\n［＃改ページ］\n");
                        bw.append("［＃ここから大見出し］\n");
                        printText(bw, preChapterTitle);
                        bw.append('\n');
                        bw.append("［＃ここで大見出し終わり］\n");
                        bw.append('\n');
                    }
                    //更新日時を一覧から取得
                    String postDate = null;
                    if (postDateList != null && postDateList.length > chapterIdx) {
                        postDate = postDateList[chapterIdx];
                    }
                    String subTitle = null;
                    if (subtitles != null && subtitles.size() > chapterIdx)
                        subTitle = subtitles.get(chapterIdx);
                    docToAozoraText(bw, chapterDoc, newChapter, subTitle, postDate);
                }
                chapterIdx++;
            }
            //出力話数を表示
            if (modifiedChapterIdx != null) {
                StringBuilder buf = new StringBuilder();
                int preIdx = -1;
                boolean idxConnected = false;
                //出力話数生成
                for (int idx = 0; idx < chapterHrefs.size(); idx++) {
                    if (modifiedChapterIdx.contains(idx)) {
                        if (buf.length() == 0)
                            buf.append((idx + 1));
                        else {
                            if (preIdx == idx - 1) {
                                idxConnected = true;
                            } else {
                                if (idxConnected)
                                    buf.append("-" + (preIdx + 1));
                                idxConnected = false;
                                buf.append("," + (idx));
                            }
                        }
                        preIdx = idx;
                    }
                }
                if (idxConnected)
                    buf.append("-" + (preIdx + 1));
                LogAppender.println(buf + "話を変換します");
            }
        }
        //底本にURL追加
        bw.append("\n［＃改ページ］\n");
        bw.append("底本： ");
        bw.append("<a href=\"");
        bw.append(urlString);
        bw.append("\">");
        bw.append(urlString);
        bw.append("</a>");
        bw.append('\n');
        bw.append("変換日時： ");
        bw.append(dateFormat.format(new Date()));
        bw.append('\n');
    } finally {
        bw.close();
    }
    this.canceled = false;
    return txtFile;
}

Also used : Element(org.jsoup.nodes.Element) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) URL(java.net.URL) IOException(java.io.IOException) Date(java.util.Date) BufferedWriter(java.io.BufferedWriter) HttpURLConnection(java.net.HttpURLConnection) FileOutputStream(java.io.FileOutputStream) OutputStreamWriter(java.io.OutputStreamWriter) File(java.io.File) Vector(java.util.Vector)

Example 88 with Document

use of org.jsoup.nodes.Document in project Java-readability by basis-technology-corp.

the class NekoJsoupParser method parse.

public Document parse(String data, String baseUri) throws SAXException, IOException {
    InputSource source = new InputSource();
    source.setCharacterStream(new StringReader(data));
    SAXParser nekoParser = new SAXParser();
    Document document = new Document(baseUri);
    nekoParser.setContentHandler(new Handler(document));
    nekoParser.setErrorHandler(new LocalErrorHandler());
    nekoParser.parse(source);
    return document;
}

Also used : InputSource(org.xml.sax.InputSource) StringReader(java.io.StringReader) SAXParser(org.cyberneko.html.parsers.SAXParser) DefaultHandler(org.xml.sax.helpers.DefaultHandler) ErrorHandler(org.xml.sax.ErrorHandler) Document(org.jsoup.nodes.Document)

Example 89 with Document

use of org.jsoup.nodes.Document in project MusicDNA by harjot-oberai.

the class Genius method fromURL.

public static Lyrics fromURL(String url, String artist, String title) {
    Document lyricsPage;
    String text;
    try {
        lyricsPage = Jsoup.connect(url).userAgent(Net.USER_AGENT).get();
        Elements lyricsDiv = lyricsPage.select(".lyrics");
        if (lyricsDiv.isEmpty())
            throw new StringIndexOutOfBoundsException();
        else
            text = Jsoup.clean(lyricsDiv.html(), Whitelist.none().addTags("br")).trim();
    } catch (HttpStatusException e) {
        return new Lyrics(Lyrics.NO_RESULT);
    } catch (IOException | StringIndexOutOfBoundsException e) {
        e.printStackTrace();
        return new Lyrics(Lyrics.ERROR);
    }
    if (artist == null) {
        title = lyricsPage.getElementsByClass("text_title").get(0).text();
        artist = lyricsPage.getElementsByClass("text_artist").get(0).text();
    }
    Lyrics result = new Lyrics(Lyrics.POSITIVE_RESULT);
    if ("[Instrumental]".equals(text))
        result = new Lyrics(Lyrics.NEGATIVE_RESULT);
    Pattern pattern = Pattern.compile("\\[.+\\]");
    StringBuilder builder = new StringBuilder();
    for (String line : text.split("<br> ")) {
        String strippedLine = line.replaceAll("\\s", "");
        if (!pattern.matcher(strippedLine).matches() && !(strippedLine.isEmpty() && builder.length() == 0))
            builder.append(line.replaceAll("\\P{Print}", "")).append("<br/>");
    }
    if (builder.length() > 5)
        builder.delete(builder.length() - 5, builder.length());
    result.setArtist(artist);
    result.setTitle(title);
    result.setText(Normalizer.normalize(builder.toString(), Normalizer.Form.NFD));
    result.setURL(url);
    result.setSource("Genius");
    return result;
}

Also used : Pattern(java.util.regex.Pattern) HttpStatusException(org.jsoup.HttpStatusException) IOException(java.io.IOException) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements)

Example 90 with Document

use of org.jsoup.nodes.Document in project MusicDNA by harjot-oberai.

the class Genius method search.

public static ArrayList<Lyrics> search(String query) {
    ArrayList<Lyrics> results = new ArrayList<>();
    query = Normalizer.normalize(query, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
    JsonObject response = null;
    try {
        URL queryURL = new URL(String.format("http://api.genius.com/search?q=%s", URLEncoder.encode(query, "UTF-8")));
        Connection connection = Jsoup.connect(queryURL.toExternalForm()).header("Authorization", "Bearer " + Config.GENIUS).timeout(0).ignoreContentType(true);
        Document document = connection.userAgent(Net.USER_AGENT).get();
        response = new JsonParser().parse(document.text()).getAsJsonObject();
    } catch (JsonSyntaxException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
    if (response == null || response.getAsJsonObject("meta").get("status").getAsInt() != 200)
        return results;
    JsonArray hits = response.getAsJsonObject("response").getAsJsonArray("hits");
    int processed = 0;
    while (processed < hits.size()) {
        JsonObject song = hits.get(processed).getAsJsonObject().getAsJsonObject("result");
        String artist = song.getAsJsonObject("primary_artist").get("name").getAsString();
        String title = song.get("title").getAsString();
        String url = "http://genius.com/songs/" + song.get("id").getAsString();
        Lyrics l = new Lyrics(Lyrics.SEARCH_ITEM);
        l.setArtist(artist);
        l.setTitle(title);
        l.setURL(url);
        l.setSource("Genius");
        results.add(l);
        processed++;
    }
    return results;
}

Also used : ArrayList(java.util.ArrayList) Connection(org.jsoup.Connection) JsonObject(com.google.gson.JsonObject) IOException(java.io.IOException) Document(org.jsoup.nodes.Document) URL(java.net.URL) JsonArray(com.google.gson.JsonArray) JsonSyntaxException(com.google.gson.JsonSyntaxException) JsonParser(com.google.gson.JsonParser)

Aggregations

Document (org.jsoup.nodes.Document)391 Test (org.junit.Test)194 Element (org.jsoup.nodes.Element)153 IOException (java.io.IOException)100 File (java.io.File)81 Elements (org.jsoup.select.Elements)70 ElementHandlerImpl (org.asqatasun.ruleimplementation.ElementHandlerImpl)51 Connection (org.jsoup.Connection)37 ArrayList (java.util.ArrayList)36 URL (java.net.URL)24 HashMap (java.util.HashMap)16 InputStream (java.io.InputStream)13 List (java.util.List)9 MalformedURLException (java.net.MalformedURLException)8 Matcher (java.util.regex.Matcher)7 Logger (org.slf4j.Logger)7 Pattern (java.util.regex.Pattern)6 HttpGet (org.apache.http.client.methods.HttpGet)6 Jsoup (org.jsoup.Jsoup)6 LoggerFactory (org.slf4j.LoggerFactory)6