use of com.kohlschutter.boilerpipe.document.TextDocument in project Gargoyle by callakrsos.
the class TF_IDF method boilerpipeTest.
@Test
public void boilerpipeTest() throws Exception {
URL url;
url = new URL("https://twitter.com/intent/favorite?tweet_id=805627131061374976");
String str = RequestUtil.request(url, new ResponseHandler<String>() {
@Override
public String apply(InputStream is, Integer code) {
try {
InputSource inputSource = new InputSource(is);
inputSource.setEncoding("UTF-8");
final BoilerpipeSAXInput in = new BoilerpipeSAXInput(inputSource);
final TextDocument doc = in.getTextDocument();
System.out.println("HH");
LOGGER.debug("HH");
System.out.println(KeepEverythingExtractor.INSTANCE.getText(doc));
return ArticleSentencesExtractor.INSTANCE.getText(doc);
// return ArticleExtractor.INSTANCE.getText(doc);
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
}, true);
LOGGER.debug(str);
System.out.println(str);
}
use of com.kohlschutter.boilerpipe.document.TextDocument in project Gargoyle by callakrsos.
the class ArticleExtractorComposite method boilderPipe.
private String boilderPipe(Class<? extends ExtractorBase> algorism, String content) {
try (StringReader characterStream = new StringReader(content)) {
InputSource inputSource = new InputSource(characterStream);
inputSource.setEncoding("UTF-8");
final BoilerpipeSAXInput in = new BoilerpipeSAXInput(inputSource);
final TextDocument doc = in.getTextDocument();
content = ValueUtil.HTML.newInsntance(algorism).getText(doc);
} catch (Exception e) {
e.printStackTrace();
}
return content;
}
Aggregations