use of org.jsoup.nodes.Document in project blueocean-plugin by jenkinsci.
the class StatePreloaderTest method test.
@Test
public void test() throws IOException, ExecutionException, InterruptedException, SAXException {
// Create a project and run a build on it.
FreeStyleProject freestyleProject = j.createProject(FreeStyleProject.class, "freestyle");
FreeStyleBuild run = freestyleProject.scheduleBuild2(0).get();
j.waitForCompletion(run);
// Lets request the activity page for that project. The page should
// contain some prefetched javascript for the pipeline
// details + the runs on the page
String projectBlueUrl = j.jenkins.getRootUrl() + BlueOceanWebURLBuilder.toBlueOceanURL(freestyleProject);
Document doc = Jsoup.connect(projectBlueUrl + "/activity/").get();
String script = doc.select("head script").toString();
Assert.assertTrue(script.contains(String.format("setState('prefetchdata.%s',", PipelineStatePreloader.class.getSimpleName())));
Assert.assertTrue(script.contains(String.format("setState('prefetchdata.%s',", PipelineActivityStatePreloader.class.getSimpleName())));
Assert.assertTrue(script.contains("\"restUrl\":\"/blue/rest/organizations/jenkins/pipelines/freestyle/activities/?start=0&limit=26\""));
}
use of org.jsoup.nodes.Document in project AozoraEpub3 by hmdev.
the class WebAozoraConverter method convertToAozoraText.
////////////////////////////////////////////////////////////////
/** 変換実行
* @param urlString
* @param cachePath
* @param interval
* @param modifiedExpire この時間以内のキャッシュを更新分として扱う
* @param convertUpdated 更新時のみ出力
* @param convertModifiedOnly 追加更新分のみ変換
* @param convertModifiedTail 最新話から連続したもののみ変換
* @param beforeChapter 指定話数のみ変換 0は指定無し
* @return 変換スキップやキャンセルならnullを返す */
public File convertToAozoraText(String urlString, File cachePath, int interval, float modifiedExpire, boolean convertUpdated, boolean convertModifiedOnly, boolean convertModifiedTail, int beforeChapter) throws IOException {
this.canceled = false;
//日付一覧が取得できない場合は常に更新
this.updated = true;
this.interval = Math.max(500, interval);
this.modifiedExpire = Math.max(0, modifiedExpire);
this.convertUpdated = convertUpdated;
this.convertModifiedOnly = convertModifiedOnly;
this.convertModifiedTail = convertModifiedTail;
this.beforeChapter = beforeChapter;
//末尾の / をリダイレクトで取得
urlString = urlString.trim();
if (!urlString.endsWith("/") && !urlString.endsWith(".html") && !urlString.endsWith(".htm") && urlString.indexOf("?") == -1) {
HttpURLConnection connection = null;
try {
connection = (HttpURLConnection) new URL(urlString + "/").openConnection();
if (connection.getResponseCode() == 200) {
urlString += "/";
LogAppender.println("URL修正 : " + urlString);
}
} catch (Exception e) {
} finally {
if (connection != null)
connection.disconnect();
}
}
this.urlString = urlString;
this.baseUri = urlString.substring(0, urlString.indexOf('/', urlString.indexOf("//") + 2));
//String fqdn = baseUri.substring(baseUri.indexOf("//")+2);
String listBaseUrl = urlString.substring(0, urlString.lastIndexOf('/') + 1);
this.pageBaseUri = listBaseUrl;
//http://を除外
String urlFilePath = CharUtils.escapeUrlToFile(urlString.substring(urlString.indexOf("//") + 2));
//http://を除外した文字列で比較
/*ExtractInfo[] extractInfos = this.queryMap.get(ExtractId.PAGE_REGEX);
if(extractInfos != null) {
if (!extractInfos[0].matches(urlString)) {
LogAppender.println("読み込み可能なURLではありません");
return null;
}
}*/
String urlParentPath = urlFilePath;
boolean isPath = false;
if (urlFilePath.endsWith("/")) {
isPath = true;
urlFilePath += "index.html";
} else
urlParentPath = urlFilePath.substring(0, urlFilePath.lastIndexOf('/') + 1);
//変換結果
this.dstPath = cachePath.getAbsolutePath() + "/";
if (isPath)
this.dstPath += urlParentPath;
else
this.dstPath += urlFilePath + "_converted/";
File txtFile = new File(this.dstPath + "converted.txt");
//表紙画像はtxtと同じ名前で保存 拡張子はpngだが表示はできるのでそのまま
File coverImageFile = new File(this.dstPath + "converted.png");
//更新情報格納先
File updateInfoFile = new File(this.dstPath + "update.txt");
//フォルダ以外がすでにあったら削除
File parentFile = txtFile.getParentFile();
if (parentFile.exists() && !parentFile.isDirectory()) {
parentFile.delete();
}
parentFile.mkdirs();
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(txtFile), "UTF-8"));
try {
//urlStringのファイルをキャッシュ
File cacheFile = new File(cachePath.getAbsolutePath() + "/" + urlFilePath);
try {
LogAppender.append(urlString);
cacheFile(urlString, cacheFile, null);
LogAppender.println(" : List Loaded.");
} catch (Exception e) {
e.printStackTrace();
LogAppender.println("一覧ページの取得に失敗しました。 ");
if (!cacheFile.exists())
return null;
LogAppender.println("キャッシュファイルを利用します。");
}
//パスならlist.txtの情報を元にキャッシュ後に青空txt変換して改ページで繋げて出力
Document doc = Jsoup.parse(cacheFile, null);
//表紙画像
Elements images = getExtractElements(doc, this.queryMap.get(ExtractId.COVER_IMG));
if (images != null) {
printImage(null, images.get(0), coverImageFile);
}
//タイトル
boolean hasTitle = false;
String series = getExtractText(doc, this.queryMap.get(ExtractId.SERIES));
if (series != null) {
printText(bw, series);
bw.append('\n');
hasTitle = true;
}
String title = getExtractText(doc, this.queryMap.get(ExtractId.TITLE));
if (title != null) {
printText(bw, title);
bw.append('\n');
hasTitle = true;
}
if (!hasTitle) {
LogAppender.println("SERIES/TITLE : タイトルがありません");
return null;
}
//著者
String author = getExtractText(doc, this.queryMap.get(ExtractId.AUTHOR));
if (author != null) {
printText(bw, author);
}
bw.append('\n');
//説明
Element description = getExtractFirstElement(doc, this.queryMap.get(ExtractId.DESCRIPTION));
if (description != null) {
bw.append('\n');
bw.append("[#区切り線]\n");
bw.append('\n');
bw.append("[#ここから2字下げ]\n");
bw.append("[#ここから2字上げ]\n");
printNode(bw, description, true);
bw.append('\n');
bw.append("[#ここで字上げ終わり]\n");
bw.append("[#ここで字下げ終わり]\n");
bw.append('\n');
bw.append("[#区切り線]\n");
bw.append('\n');
}
String contentsUpdate = getExtractText(doc, this.queryMap.get(ExtractId.UPDATE));
//章名称 変わった場合に出力
String preChapterTitle = "";
//各話のURL(フルパス)を格納
Vector<String> chapterHrefs = new Vector<String>();
Elements hrefs = getExtractElements(doc, this.queryMap.get(ExtractId.HREF));
if (hrefs == null && this.queryMap.containsKey(ExtractId.HREF)) {
LogAppender.println("HREF : 各話のリンク先URLが取得できません");
}
Vector<String> subtitles = getExtractStrings(doc, this.queryMap.get(ExtractId.SUBTITLE_LIST), true);
if (subtitles == null && this.queryMap.containsKey(ExtractId.SUBTITLE_LIST)) {
LogAppender.println("SUBTITLE_LIST : 各話タイトルが取得できません");
}
//更新のない各話のURL(フルパス)を格納
//nullならキャッシュ更新無しで、空ならすべて更新される
HashSet<String> noUpdateUrls = null;
String[] postDateList = null;
if (hrefs == null) {
//ページ番号取得
String pageNumString = getExtractText(doc, this.queryMap.get(ExtractId.PAGE_NUM));
if (pageNumString == null && this.queryMap.containsKey(ExtractId.PAGE_NUM)) {
LogAppender.println("PAGE_NUM : ページ数が取得できません");
}
int pageNum = -1;
try {
pageNum = Integer.parseInt(pageNumString);
} catch (Exception e) {
}
Element pageUrlElement = getExtractFirstElement(doc, this.queryMap.get(ExtractId.PAGE_URL));
if (pageUrlElement == null && this.queryMap.containsKey(ExtractId.PAGE_URL)) {
LogAppender.println("PAGE_URL : ページ番号用のURLが取得できません");
}
if (pageNum > 0 && pageUrlElement != null) {
ExtractInfo pageUrlExtractInfo = this.queryMap.get(ExtractId.PAGE_URL)[0];
//リンク生成 1~ページ番号まで
for (int i = 1; i <= pageNum; i++) {
String pageUrl = pageUrlElement.attr("href");
if (pageUrl != null) {
pageUrl = pageUrlExtractInfo.replace(pageUrl + "\t" + i);
if (pageUrl != null) {
if (!pageUrl.startsWith("http")) {
if (pageUrl.charAt(0) == '/')
pageUrl = baseUri + pageUrl;
else
pageUrl = listBaseUrl + pageUrl;
}
chapterHrefs.add(pageUrl);
}
}
}
} else {
Elements contentDivs = getExtractElements(doc, this.queryMap.get(ExtractId.CONTENT_ARTICLE));
if (contentDivs != null) {
//一覧のリンクはないが本文がある場合
docToAozoraText(bw, doc, false, null, null);
} else {
LogAppender.println("一覧のURLが取得できませんでした");
return null;
}
}
} else {
//更新分のみ取得するようにするためhrefに対応した日付タグの文字列(innerHTML)を取得して保存しておく
Elements updates = getExtractElements(doc, this.queryMap.get(ExtractId.SUB_UPDATE));
if (updates == null && this.queryMap.containsKey(ExtractId.SUB_UPDATE)) {
LogAppender.println("SUB_UPDATE : 更新確認情報が取得できません");
}
if (updates != null) {
//更新しないURLのチェック用
noUpdateUrls = createNoUpdateUrls(updateInfoFile, urlString, listBaseUrl, contentsUpdate, hrefs, updates);
}
//一覧のhrefをすべて取得
for (Element href : hrefs) {
String hrefString = href.attr("href");
if (hrefString == null || hrefString.length() == 0)
continue;
//パターンがあればマッチング
ExtractInfo extractInfo = this.queryMap.get(ExtractId.HREF)[0];
if (!extractInfo.hasPattern() || extractInfo.matches(hrefString)) {
String chapterHref = hrefString;
if (!hrefString.startsWith("http")) {
if (hrefString.charAt(0) == '/')
chapterHref = baseUri + hrefString;
else
chapterHref = listBaseUrl + hrefString;
}
chapterHrefs.add(chapterHref);
}
}
postDateList = getPostDateList(doc, this.queryMap.get(ExtractId.CONTENT_UPDATE_LIST));
if (postDateList == null && this.queryMap.containsKey(ExtractId.CONTENT_UPDATE_LIST)) {
LogAppender.println("CONTENT_UPDATE_LIST : 一覧ページの更新日時情報が取得できません");
}
}
if (chapterHrefs.size() > 0) {
//全話で更新や追加があるかチェック
updated = false;
//追加更新対象の期限 これより大きければ追加更新
long expire = System.currentTimeMillis() - (long) (this.modifiedExpire * 3600000);
//追加更新分のみ出力時に利用
HashSet<Integer> modifiedChapterIdx = null;
//更新されていない最後の話数 0~
int lastNoModifiedChapterIdx = -1;
if (this.convertModifiedOnly) {
modifiedChapterIdx = new HashSet<Integer>();
}
int chapterIdx = 0;
for (String chapterHref : chapterHrefs) {
if (this.canceled)
return null;
if (chapterHref != null && chapterHref.length() > 0) {
//画像srcをフルパスにするときに使うページのパス
this.pageBaseUri = chapterHref;
if (!chapterHref.endsWith("/")) {
int idx = chapterHref.indexOf('/', 7);
if (idx > -1)
this.pageBaseUri = chapterHref.substring(0, idx);
}
//キャッシュ取得 ロードされたらWait 500ms
String chapterPath = CharUtils.escapeUrlToFile(chapterHref.substring(chapterHref.indexOf("//") + 2));
File chapterCacheFile = new File(cachePath.getAbsolutePath() + "/" + chapterPath + (chapterPath.endsWith("/") ? "index.html" : ""));
//hrefsのときは更新分のみurlsに入っている
boolean loaded = false;
//更新対象ならtrueに変更
boolean reload = false;
//nullでなく更新無しに含まれなければ再読込
if (noUpdateUrls != null && !noUpdateUrls.contains(chapterHref))
reload = true;
if (reload || !chapterCacheFile.exists()) {
LogAppender.append("[" + (chapterIdx + 1) + "/" + chapterHrefs.size() + "] " + chapterHref);
try {
try {
Thread.sleep(this.interval);
} catch (InterruptedException e) {
}
cacheFile(chapterHref, chapterCacheFile, urlString);
LogAppender.println(" : Loaded.");
//ファイルがロードされたら更新有り
this.updated = true;
loaded = true;
} catch (Exception e) {
e.printStackTrace();
LogAppender.println("htmlファイルが取得できませんでした : " + chapterHref);
}
}
//キャッシュされているファイルが指定時間内なら更新扱い
if (!loaded) {
if (this.modifiedExpire > 0 && (this.convertModifiedOnly || this.convertUpdated) && chapterCacheFile.lastModified() >= expire) {
LogAppender.append("[" + (chapterIdx + 1) + "/" + chapterHrefs.size() + "] " + chapterHref);
LogAppender.println(" : Modified.");
this.updated = true;
}
}
//更新分のみ出力時のチェック
if (this.convertModifiedOnly) {
//ファイルの更新日時で比較
if (chapterCacheFile.lastModified() >= expire) {
modifiedChapterIdx.add(chapterIdx);
} else {
if (this.convertModifiedTail) {
//最新から連続していない話は除外
modifiedChapterIdx.clear();
}
lastNoModifiedChapterIdx = chapterIdx;
}
}
}
chapterIdx++;
}
//更新が無くて変換もなければ終了
if (!this.updated) {
LogAppender.append("「" + title + "」");
LogAppender.println("の更新はありません");
if (this.convertUpdated)
return null;
}
if (this.convertModifiedOnly) {
//更新前の話数を追加 昇順で重複もはじく
if (this.beforeChapter > 0) {
int startIdx = Math.max(0, lastNoModifiedChapterIdx - this.beforeChapter + 1);
if (modifiedChapterIdx.size() == 0) {
//追加分なし
int idx = chapterHrefs.size() - 1;
for (int i = 0; i < this.beforeChapter; i++) {
modifiedChapterIdx.add(idx--);
}
} else {
//追加分あり
for (int i = startIdx; i <= lastNoModifiedChapterIdx; i++) {
modifiedChapterIdx.add(i);
}
}
}
if (modifiedChapterIdx.size() == 0) {
LogAppender.println("追加更新分はありません");
this.updated = false;
return null;
}
} else {
//最新話数指定
if (this.beforeChapter > 0) {
int idx = chapterHrefs.size() - 1;
modifiedChapterIdx = new HashSet<Integer>();
for (int i = 0; i < this.beforeChapter; i++) {
modifiedChapterIdx.add(idx--);
}
}
}
//変換実行
chapterIdx = 0;
for (String chapterHref : chapterHrefs) {
if (this.canceled)
return null;
if (modifiedChapterIdx == null || modifiedChapterIdx.contains(chapterIdx)) {
//キャッシュファイル取得
String chapterPath = CharUtils.escapeUrlToFile(chapterHref.substring(chapterHref.indexOf("//") + 2));
File chapterCacheFile = new File(cachePath.getAbsolutePath() + "/" + chapterPath + (chapterPath.endsWith("/") ? "index.html" : ""));
//シリーズタイトルを出力
Document chapterDoc = Jsoup.parse(chapterCacheFile, null);
String chapterTitle = getExtractText(chapterDoc, this.queryMap.get(ExtractId.CONTENT_CHAPTER));
boolean newChapter = false;
if (chapterTitle != null && !preChapterTitle.equals(chapterTitle)) {
newChapter = true;
preChapterTitle = chapterTitle;
bw.append("\n[#改ページ]\n");
bw.append("[#ここから大見出し]\n");
printText(bw, preChapterTitle);
bw.append('\n');
bw.append("[#ここで大見出し終わり]\n");
bw.append('\n');
}
//更新日時を一覧から取得
String postDate = null;
if (postDateList != null && postDateList.length > chapterIdx) {
postDate = postDateList[chapterIdx];
}
String subTitle = null;
if (subtitles != null && subtitles.size() > chapterIdx)
subTitle = subtitles.get(chapterIdx);
docToAozoraText(bw, chapterDoc, newChapter, subTitle, postDate);
}
chapterIdx++;
}
//出力話数を表示
if (modifiedChapterIdx != null) {
StringBuilder buf = new StringBuilder();
int preIdx = -1;
boolean idxConnected = false;
//出力話数生成
for (int idx = 0; idx < chapterHrefs.size(); idx++) {
if (modifiedChapterIdx.contains(idx)) {
if (buf.length() == 0)
buf.append((idx + 1));
else {
if (preIdx == idx - 1) {
idxConnected = true;
} else {
if (idxConnected)
buf.append("-" + (preIdx + 1));
idxConnected = false;
buf.append("," + (idx));
}
}
preIdx = idx;
}
}
if (idxConnected)
buf.append("-" + (preIdx + 1));
LogAppender.println(buf + "話を変換します");
}
}
//底本にURL追加
bw.append("\n[#改ページ]\n");
bw.append("底本: ");
bw.append("<a href=\"");
bw.append(urlString);
bw.append("\">");
bw.append(urlString);
bw.append("</a>");
bw.append('\n');
bw.append("変換日時: ");
bw.append(dateFormat.format(new Date()));
bw.append('\n');
} finally {
bw.close();
}
this.canceled = false;
return txtFile;
}
use of org.jsoup.nodes.Document in project Java-readability by basis-technology-corp.
the class NekoJsoupParser method parse.
public Document parse(String data, String baseUri) throws SAXException, IOException {
InputSource source = new InputSource();
source.setCharacterStream(new StringReader(data));
SAXParser nekoParser = new SAXParser();
Document document = new Document(baseUri);
nekoParser.setContentHandler(new Handler(document));
nekoParser.setErrorHandler(new LocalErrorHandler());
nekoParser.parse(source);
return document;
}
use of org.jsoup.nodes.Document in project MusicDNA by harjot-oberai.
the class Genius method fromURL.
public static Lyrics fromURL(String url, String artist, String title) {
Document lyricsPage;
String text;
try {
lyricsPage = Jsoup.connect(url).userAgent(Net.USER_AGENT).get();
Elements lyricsDiv = lyricsPage.select(".lyrics");
if (lyricsDiv.isEmpty())
throw new StringIndexOutOfBoundsException();
else
text = Jsoup.clean(lyricsDiv.html(), Whitelist.none().addTags("br")).trim();
} catch (HttpStatusException e) {
return new Lyrics(Lyrics.NO_RESULT);
} catch (IOException | StringIndexOutOfBoundsException e) {
e.printStackTrace();
return new Lyrics(Lyrics.ERROR);
}
if (artist == null) {
title = lyricsPage.getElementsByClass("text_title").get(0).text();
artist = lyricsPage.getElementsByClass("text_artist").get(0).text();
}
Lyrics result = new Lyrics(Lyrics.POSITIVE_RESULT);
if ("[Instrumental]".equals(text))
result = new Lyrics(Lyrics.NEGATIVE_RESULT);
Pattern pattern = Pattern.compile("\\[.+\\]");
StringBuilder builder = new StringBuilder();
for (String line : text.split("<br> ")) {
String strippedLine = line.replaceAll("\\s", "");
if (!pattern.matcher(strippedLine).matches() && !(strippedLine.isEmpty() && builder.length() == 0))
builder.append(line.replaceAll("\\P{Print}", "")).append("<br/>");
}
if (builder.length() > 5)
builder.delete(builder.length() - 5, builder.length());
result.setArtist(artist);
result.setTitle(title);
result.setText(Normalizer.normalize(builder.toString(), Normalizer.Form.NFD));
result.setURL(url);
result.setSource("Genius");
return result;
}
use of org.jsoup.nodes.Document in project MusicDNA by harjot-oberai.
the class Genius method search.
public static ArrayList<Lyrics> search(String query) {
ArrayList<Lyrics> results = new ArrayList<>();
query = Normalizer.normalize(query, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
JsonObject response = null;
try {
URL queryURL = new URL(String.format("http://api.genius.com/search?q=%s", URLEncoder.encode(query, "UTF-8")));
Connection connection = Jsoup.connect(queryURL.toExternalForm()).header("Authorization", "Bearer " + Config.GENIUS).timeout(0).ignoreContentType(true);
Document document = connection.userAgent(Net.USER_AGENT).get();
response = new JsonParser().parse(document.text()).getAsJsonObject();
} catch (JsonSyntaxException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
if (response == null || response.getAsJsonObject("meta").get("status").getAsInt() != 200)
return results;
JsonArray hits = response.getAsJsonObject("response").getAsJsonArray("hits");
int processed = 0;
while (processed < hits.size()) {
JsonObject song = hits.get(processed).getAsJsonObject().getAsJsonObject("result");
String artist = song.getAsJsonObject("primary_artist").get("name").getAsString();
String title = song.get("title").getAsString();
String url = "http://genius.com/songs/" + song.get("id").getAsString();
Lyrics l = new Lyrics(Lyrics.SEARCH_ITEM);
l.setArtist(artist);
l.setTitle(title);
l.setURL(url);
l.setSource("Genius");
results.add(l);
processed++;
}
return results;
}
Aggregations