use of com.kyj.fx.voeditor.visual.util.ResponseHandler in project Gargoyle by callakrsos.
the class TF_IDF method findAllLinks.
@Test
public void findAllLinks() throws Exception {
URL url;
url = new URL("https://search.naver.com/search.naver?where=nexearch&query=%ED%91%9C%EC%B0%BD%EC%9B%90&sm=top_hty&fbm=1&ie=utf8");
ResponseHandler<Set<String>> responseHandler = new ResponseHandler<Set<String>>() {
@Override
public Set<String> apply(InputStream is, Integer code) {
Set<String> collect = Collections.emptySet();
try {
Document parse = Jsoup.parse(is, "UTF-8", "http");
/*a 태그 만 추출.*/
// parse.getElementsByTag("a");
Elements elementsByTag = parse.getElementsByTag("a");
collect = elementsByTag.stream().filter(e -> e.hasAttr("href")).map(e -> e.attr("href").trim()).filter(e -> e.startsWith("http") || e.startsWith("https")).filter(v -> {
if ("https://submit.naver.com/".equals(v))
return false;
else if ("http://www.naver.com".equals(v))
return false;
else if (v.startsWith("https://nid.naver.com"))
return false;
else if (v.startsWith("http://searchad.naver.com"))
return false;
else if (v.contains("namu.wiki"))
return false;
else if (v.contains("wikipedia.org"))
return false;
else if (v.startsWith("http://music.naver.com"))
return false;
else if (v.startsWith("http://m.post.naver.com"))
return false;
else if (v.startsWith("http://tvcast.naver.com"))
return false;
else if (v.startsWith("http://shopping.naver.com"))
return false;
else if (v.startsWith("https://help.naver"))
return false;
else if (v.startsWith("http://www.navercorp.com"))
return false;
else if (v.startsWith("http://book.naver.com"))
return false;
else if (v.startsWith("http://www.cwpyo.com"))
return false;
else if (v.startsWith("http://navercast.naver.com"))
return false;
return true;
}).collect(Collectors.toSet());
} catch (IOException e) {
e.printStackTrace();
}
return collect;
}
};
Set<String> reqeustSSL = RequestUtil.requestSSL(url, responseHandler);
// reqeustSSL.forEach(System.out::println);
getString(reqeustSSL);
}
use of com.kyj.fx.voeditor.visual.util.ResponseHandler in project Gargoyle by callakrsos.
the class TF_IDF method getString.
public void getString(Collection<String> links) {
URLModel[] array = links.parallelStream().map(link -> {
URLModel model = URLModel.empty();
try {
ResponseHandler<URLModel> responseHandler = new ResponseHandler<URLModel>() {
@Override
public URLModel apply(InputStream is, Integer code) {
if (code == 200) {
return new URLModel(link, ValueUtil.toString(is));
}
return URLModel.empty();
}
};
if (link.startsWith("https")) {
model = RequestUtil.requestSSL(new URL(link), responseHandler);
} else {
model = RequestUtil.request(new URL(link), responseHandler);
}
} catch (Exception e) {
return URLModel.empty();
}
return model;
}).filter(v -> !v.isEmpty()).map(v -> {
String content = v.getContent();
ExtractorBase instance = ArticleExtractor.getInstance();
InputSource source = new InputSource(new StringReader(content));
source.setEncoding("UTF-8");
try {
content = ValueUtil.HTML.getNewsContent(instance, source);
v.setContent(content);
} catch (Exception e) {
v = URLModel.empty();
e.printStackTrace();
}
return v;
}).filter(v -> !v.isEmpty()).toArray(URLModel[]::new);
List<KeyValue> tf_IDF = ValueUtil.toTF_IDF(array);
tf_IDF.forEach(v -> {
System.out.println(v.toString());
});
}
Aggregations