use of com.bc.pmpheep.general.runnable.WechatArticle in project pmph by BCSquad.
the class WechatArticleServiceTest method getArticle.
@Test
public void getArticle() throws Exception {
String url = "https://mp.weixin.qq.com/s?__biz=MzA4NTQwNDcyMA==&mid=2650661978&idx=1&sn=2f5329f5b2bfda7050822cc5e3a4f03f&scene=21#wechat_redirect";
String guid = WechatArticleService.runCrawler(url);
logger.info("公众号文章id={}", guid);
int count = 0;
while (!Const.WACT_MAP.containsKey(guid)) {
Thread.sleep(2000);
count++;
if (count > 4) {
throw new Exception("可能是网络原因导致的公众号文章爬取失败");
}
}
WechatArticle wechatArticle = Const.WACT_MAP.get(guid);
logger.info(wechatArticle.getDone() ? "文章同步完成" : "文章同步未完成");
if (wechatArticle.getError()) {
logger.info("文章同步过程中出现问题");
}
logger.info("---以下为文章抓取内容---");
logger.info(wechatArticle.getResult());
}
use of com.bc.pmpheep.general.runnable.WechatArticle in project pmph by BCSquad.
the class WechatArticleService method runCrawler.
public String runCrawler(String url) throws CheckedServiceException {
if (StringUtil.isEmpty(url)) {
throw new CheckedServiceException(CheckedExceptionBusiness.WECHAT_ARTICLE, CheckedExceptionResult.NULL_PARAM, "给定链接不能为空");
}
if (url.length() < 25) {
throw new CheckedServiceException(CheckedExceptionBusiness.WECHAT_ARTICLE, CheckedExceptionResult.ILLEGAL_PARAM, "同步失败,请检查链接地址是否正确,或者其他原因");
} else {
String httpStr = url.substring(0, 23);
String httpsStr = url.substring(0, 24);
if (!"http://mp.weixin.qq.com".equals(httpStr) && !"https://mp.weixin.qq.com".equals(httpsStr)) {
throw new CheckedServiceException(CheckedExceptionBusiness.WECHAT_ARTICLE, CheckedExceptionResult.ILLEGAL_PARAM, "同步失败,请检查链接地址是否正确,或者其他原因");
}
}
String guid = String.valueOf(System.currentTimeMillis()).concat(String.valueOf(RandomUtil.getRandomNum()));
taskExecutor.execute(new WechatArticleCrawlerTask(new WechatArticle(guid, url)));
return guid;
}
use of com.bc.pmpheep.general.runnable.WechatArticle in project pmph by BCSquad.
the class WechatArticleService method synchroCmsContent.
public CmsContent synchroCmsContent(String guid, HttpServletRequest request) throws IOException {
String sessionId = CookiesUtil.getSessionId(request);
PmphUser sessionPmphUser = SessionUtil.getPmphUserBySessionId(sessionId);
if (null == sessionPmphUser) {
throw new CheckedServiceException(CheckedExceptionBusiness.MATERIAL, CheckedExceptionResult.NULL_PARAM, "请求用户不存在");
}
CmsContent cmsContent = new CmsContent();
if (StringUtil.isEmpty(guid)) {
throw new CheckedServiceException(CheckedExceptionBusiness.WECHAT_ARTICLE, CheckedExceptionResult.NULL_PARAM, "文章唯一标识不能为空");
}
// 删除文件夹及以下文件
// 获取路径
String dir = new File("").getAbsolutePath() + "/" + guid;
FileUtil.deleteDirectory(dir);
if (Const.WACT_MAP.containsKey(guid)) {
WechatArticle wechatArticle = Const.WACT_MAP.get(guid);
String html = wechatArticle.getResult();
String titleStart = "<h2 class=\"rich_media_title\" id=\"activity-name\">";
String titleEnd = "</h2>";
int titleS = html.indexOf(titleStart) + titleStart.length();
String title = "";
try {
int titleE = html.indexOf(titleEnd);
// 获取标题
title = html.substring(titleS, titleE);
} catch (Exception e) {
throw new CheckedServiceException(CheckedExceptionBusiness.WECHAT_ARTICLE, CheckedExceptionResult.ILLEGAL_PARAM, "同步失败,请检查链接地址是否正确,或者其他原因");
}
String contentStart = "<div class=\"rich_media_content \" id=\"js_content\">";
String contentEnd = "</div>";
int contentS = html.indexOf(contentStart) + contentStart.length();
int contentE = html.lastIndexOf(contentEnd);
// 获取内容
String content = html.substring(contentS, contentE);
// 替换内容
String contents = content.replace("data-src", "src");
// 获取图片标签
List<String> imgUrl = download.getImageUrl(contents);
// 获取图片src地址
List<String> imgSrc = download.getImageSrc(imgUrl);
// 下载图片
List<String> mongoImgs = download.download(imgSrc);
for (int i = 0; i < imgSrc.size(); i++) {
if (StringUtil.notEmpty(mongoImgs.get(i))) {
// 下载路径
String imgsId = RouteUtil.MONGODB_FILE + mongoImgs.get(i);
contents = contents.replace(imgSrc.get(i), imgsId);
}
}
if (StringUtil.isEmpty(contents)) {
throw new CheckedServiceException(CheckedExceptionBusiness.CMS, CheckedExceptionResult.NULL_PARAM, "内容参数为空");
}
// MongoDB内容插入
Content contentObj = contentService.add(new Content(contents));
if (StringUtil.isEmpty(contentObj.getId())) {
throw new CheckedServiceException(CheckedExceptionBusiness.CMS, CheckedExceptionResult.PO_ADD_FAILED, "Content对象内容保存失败");
}
// 上级id(0为内容)
cmsContent.setParentId(0L);
// 根节点路径
cmsContent.setPath("0");
// 内容id
cmsContent.setMid(contentObj.getId());
// 内容类型(1=随笔文章)
cmsContent.setCategoryId(Const.CMS_CATEGORY_ID_1);
cmsContent.setTitle(title.trim());
// 作者类型
cmsContent.setAuthorType((short) 0);
// 作者id
cmsContent.setAuthorId(sessionPmphUser.getId());
cmsContent = cmsContentService.addCmsContent(cmsContent);
}
// 防止map内存溢出,操作过后就移除
Const.WACT_MAP.remove("guid");
return cmsContent;
}
use of com.bc.pmpheep.general.runnable.WechatArticle in project pmph by BCSquad.
the class WeChatArticleController method get.
/**
* @author mr 查询人卫健康微信公众号文章
* @param guid
* @return
*/
@ResponseBody
@LogDetail(businessType = BUSSINESS_TYPE, logRemark = "查询人卫健康微信公众号文章")
@RequestMapping(value = "/article/get", method = RequestMethod.POST)
public void get(@RequestParam("guid") String guid, HttpServletResponse response) {
WechatArticle wechatArticle = wechatArticleService.get(guid);
PrintWriter writer;
try {
writer = response.getWriter();
writer.write(wechatArticle.getResult());
writer.flush();
writer.close();
} catch (IOException e) {
new CheckedServiceException(CheckedExceptionBusiness.WECHAT_ARTICLE, CheckedExceptionResult.ILLEGAL_PARAM, "非法的请求参数");
}
}
use of com.bc.pmpheep.general.runnable.WechatArticle in project pmph by BCSquad.
the class PersonalCenterController method getView.
@RequestMapping(value = "/cms/wechat/v/{guid}", method = RequestMethod.GET)
public ModelAndView getView(@PathVariable("guid") String guid) {
ModelAndView view = new ModelAndView("wechat");
if (Const.WACT_MAP.containsKey(guid)) {
WechatArticle wechatArticle = Const.WACT_MAP.get(guid);
view.addObject(wechatArticle);
}
return view;
}
Aggregations