use of com.rometools.rome.io.SyndFeedInput in project nutch by apache.
the class FeedParser method getParse.
/**
* Parses the given feed and extracts out and parsers all linked items within
* the feed, using the underlying ROME feed parsing library.
*
* @param content
* A {@link Content} object representing the feed that is being
* parsed by this {@link Parser}.
*
* @return A {@link ParseResult} containing all {@link Parse}d feeds that were
* present in the feed file that this {@link Parser} dealt with.
*/
@Override
public ParseResult getParse(Content content) {
SyndFeed feed = null;
ParseResult parseResult = new ParseResult(content.getUrl());
EncodingDetector detector = new EncodingDetector(conf);
detector.autoDetectClues(content, true);
String encoding = detector.guessEncoding(content, defaultEncoding);
try {
InputSource input = new InputSource(new ByteArrayInputStream(content.getContent()));
input.setEncoding(encoding);
SyndFeedInput feedInput = new SyndFeedInput();
feed = feedInput.build(input);
} catch (Exception e) {
// return empty parse
LOG.warn("Parse failed: url: " + content.getUrl() + ", exception: " + StringUtils.stringifyException(e));
return new ParseStatus(e).getEmptyParseResult(content.getUrl(), getConf());
}
String feedLink = feed.getLink();
try {
feedLink = normalizers.normalize(feedLink, URLNormalizers.SCOPE_OUTLINK);
if (feedLink != null)
feedLink = filters.filter(feedLink);
} catch (Exception e) {
feedLink = null;
}
List<?> entries = feed.getEntries();
for (Object entry : entries) {
addToMap(parseResult, feed, feedLink, (SyndEntry) entry, content);
}
String feedDesc = stripTags(feed.getDescriptionEx());
String feedTitle = stripTags(feed.getTitleEx());
parseResult.put(content.getUrl(), new ParseText(feedDesc), new ParseData(new ParseStatus(ParseStatus.SUCCESS), feedTitle, new Outlink[0], content.getMetadata()));
return parseResult;
}
use of com.rometools.rome.io.SyndFeedInput in project ddf by codice.
the class OpenSearchSource method processResponse.
/**
* @param is
* @param queryRequest
* @return
* @throws ddf.catalog.source.UnsupportedQueryException
*/
private SourceResponseImpl processResponse(InputStream is, QueryRequest queryRequest) throws UnsupportedQueryException {
List<Result> resultQueue = new ArrayList<>();
SyndFeedInput syndFeedInput = new SyndFeedInput();
SyndFeed syndFeed = null;
try {
syndFeed = syndFeedInput.build(new InputStreamReader(is, StandardCharsets.UTF_8));
} catch (FeedException e) {
LOGGER.debug("Unable to read RSS/Atom feed.", e);
}
List<SyndEntry> entries = null;
long totalResults = 0;
if (syndFeed != null) {
entries = syndFeed.getEntries();
for (SyndEntry entry : entries) {
resultQueue.addAll(createResponseFromEntry(entry));
}
totalResults = entries.size();
List<Element> foreignMarkup = syndFeed.getForeignMarkup();
for (Element element : foreignMarkup) {
if (element.getName().equals("totalResults")) {
try {
totalResults = Long.parseLong(element.getContent(0).getValue());
} catch (NumberFormatException | IndexOutOfBoundsException e) {
// totalResults is already initialized to the correct value, so don't change it here.
LOGGER.debug("Received invalid number of results.", e);
}
}
}
}
SourceResponseImpl response = new SourceResponseImpl(queryRequest, resultQueue);
response.setHits(totalResults);
return response;
}
use of com.rometools.rome.io.SyndFeedInput in project tika by apache.
the class FeedParser method parse.
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
// set the encoding?
try {
SyndFeed feed = new SyndFeedInput().build(new InputSource(new CloseShieldInputStream(stream)));
String title = stripTags(feed.getTitleEx());
String description = stripTags(feed.getDescriptionEx());
metadata.set(TikaCoreProperties.TITLE, title);
metadata.set(TikaCoreProperties.DESCRIPTION, description);
// store the other fields in the metadata
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.element("h1", title);
xhtml.element("p", description);
xhtml.startElement("ul");
for (Object e : feed.getEntries()) {
SyndEntry entry = (SyndEntry) e;
String link = entry.getLink();
if (link != null) {
xhtml.startElement("li");
xhtml.startElement("a", "href", link);
xhtml.characters(stripTags(entry.getTitleEx()));
xhtml.endElement("a");
SyndContent content = entry.getDescription();
if (content != null) {
xhtml.newline();
xhtml.characters(stripTags(content));
}
xhtml.endElement("li");
}
}
xhtml.endElement("ul");
xhtml.endDocument();
} catch (FeedException e) {
throw new TikaException("RSS parse error", e);
}
}
use of com.rometools.rome.io.SyndFeedInput in project OpenOLAT by OpenOLAT.
the class RomeFeedFetcher method validateFeedUrl.
@Override
public ValidatedURL validateFeedUrl(String url, boolean enclosuresExpected) {
SyndFeedInput input = new SyndFeedInput();
boolean modifiedProtocol = false;
try {
if (url != null) {
url = url.trim();
}
if (url.startsWith("feed") || url.startsWith("itpc")) {
// accept feed(s) urls like generated in safari browser
url = "http" + url.substring(4);
modifiedProtocol = true;
}
URL realUrl = new URL(url);
SyndFeed feed = input.build(new XmlReader(realUrl));
if (!feed.getEntries().isEmpty()) {
if (enclosuresExpected) {
SyndEntry entry = feed.getEntries().get(0);
if (entry.getEnclosures().isEmpty()) {
return new ValidatedURL(url, ValidatedURL.State.NO_ENCLOSURE);
}
}
return new ValidatedURL(url, ValidatedURL.State.VALID);
}
// The feed was read successfully
return new ValidatedURL(url, ValidatedURL.State.VALID);
} catch (ParsingFeedException e) {
if (modifiedProtocol) {
// fallback for SWITCHcast itpc -> http -> https
url = "https" + url.substring(4);
return validateFeedUrl(url, enclosuresExpected);
}
String message = String.format("Validation of the feed url %s failed. %s: %s ", url, e.getClass(), e.getMessage());
log.debug(message);
return new ValidatedURL(url, ValidatedURL.State.NOT_FOUND);
} catch (FileNotFoundException e) {
String message = String.format("Validation of the feed url %s failed. %s: %s ", url, e.getClass(), e.getMessage());
log.debug(message);
return new ValidatedURL(url, ValidatedURL.State.NOT_FOUND);
} catch (Exception e) {
String message = String.format("Validation of the feed url %s failed. %s: %s ", url, e.getClass(), e.getMessage());
log.debug(message);
}
return new ValidatedURL(url, ValidatedURL.State.MALFORMED);
}
use of com.rometools.rome.io.SyndFeedInput in project mycore by MyCoRe-Org.
the class MCRRSSFeedImporter method retrieveFeed.
private SyndFeed retrieveFeed() throws IOException, FeedException {
XmlReader feedReader = new XmlReader(new URL(feedURL));
SyndFeedInput input = new SyndFeedInput();
return input.build(feedReader);
}
Aggregations