use of com.pratilipi.data.type.Page in project pratilipi by Pratilipi.
the class UxModeFilter method doFilter.
@Override
public void doFilter(ServletRequest req, ServletResponse resp, FilterChain chain) throws IOException, ServletException {
if (isAndroidApp) {
threadLocalBasicMode.set(false);
threadLocalWebsite.set(null);
threadLocalRefererHost.set(null);
} else {
HttpServletRequest request = (HttpServletRequest) req;
HttpServletResponse response = (HttpServletResponse) resp;
String hostName = request.getServerName();
String requestUri = request.getRequestURI();
String userAgent = request.getHeader("user-agent");
// Defaults - for all test environments
boolean basicMode = false;
Website website = null;
// Figuring out Mode and Languages from a pre-configured list
for (Website web : Website.values()) {
if (hostName.equals(web.getHostName())) {
basicMode = false;
website = web;
break;
} else if (hostName.equals(web.getMobileHostName())) {
basicMode = true;
website = web;
break;
}
}
// NOTE: DO NOT redirect Facebook Scraping requests
if (isWebApp && website == Website.ALL_LANGUAGE && (userAgent == null || userAgent.isEmpty() || !userAgent.startsWith("facebookexternalhit/1.1"))) {
String destHostName = null;
DataAccessor dataAccessor = DataAccessorFactory.getDataAccessor();
Page page = dataAccessor.getPage(requestUri);
if (page == null) {
// Do Nothing
} else if (page.getType() == PageType.PRATILIPI) {
Pratilipi pratilipi = dataAccessor.getPratilipi(page.getPrimaryContentId());
for (Website web : Website.values()) {
if (web.getFilterLanguage() == pratilipi.getLanguage()) {
destHostName = basicMode ? web.getMobileHostName() : web.getHostName();
break;
}
}
} else if (page.getType() == PageType.AUTHOR) {
Author author = dataAccessor.getAuthor(page.getPrimaryContentId());
for (Website web : Website.values()) {
if (web.getFilterLanguage() == author.getLanguage()) {
destHostName = basicMode ? web.getMobileHostName() : web.getHostName();
break;
}
}
} else if (page.getType() == PageType.EVENT) {
Event event = dataAccessor.getEvent(page.getPrimaryContentId());
for (Website web : Website.values()) {
if (web.getFilterLanguage() == event.getLanguage()) {
destHostName = basicMode ? web.getMobileHostName() : web.getHostName();
break;
}
}
} else if (page.getType() == PageType.BLOG_POST) {
BlogPost blogPost = dataAccessor.getBlogPost(page.getPrimaryContentId());
for (Website web : Website.values()) {
if (web.getFilterLanguage() == blogPost.getLanguage()) {
destHostName = basicMode ? web.getMobileHostName() : web.getHostName();
break;
}
}
}
if (destHostName != null) {
response.setStatus(HttpServletResponse.SC_MOVED_PERMANENTLY);
response.setHeader("Location", (request.isSecure() ? "https://" : "http://") + destHostName + requestUri);
return;
}
}
// NOTE: DO NOT redirect Facebook Scraping requests
if (isWebApp && (userAgent == null || userAgent.isEmpty() || !userAgent.startsWith("facebookexternalhit/1.1"))) {
DataAccessor dataAccessor = DataAccessorFactory.getDataAccessor();
Page page = dataAccessor.getPage(requestUri);
if (page != null && page.getUriAlias() != null && requestUri.equals(page.getUri())) {
response.setStatus(HttpServletResponse.SC_MOVED_PERMANENTLY);
response.setHeader("Location", page.getUriAlias());
return;
}
}
// Figuring out Browser capability
boolean basicBrowser = false;
boolean isCrawler = false;
if (isWebApp) {
if (userAgent == null || userAgent.trim().isEmpty()) {
basicBrowser = true;
} else if (userAgent.contains("UCBrowser")) {
// UCBrowser
/*
* UCBrowser on Android 4.3
* "Mozilla/5.0 (Linux; U; Android 4.3; en-US; GT-I9300 Build/JSS15J) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 UCBrowser/10.0.1.512 U3/0.8.0 Mobile Safari/533.1"
*/
// Extreme mode
basicBrowser = true;
// } else if( userAgent.contains( "Opera Mobi" ) ) { // Opera Classic
// /*
// * Opera Classic on Android 4.3
// * "Opera/9.80 (Android 4.3; Linux; Opera Mobi/ADR-1411061201) Presto/2.11.355 Version/12.10"
// */
// basicBrowser = true; // Not sure whether Polymer 1.0 is supported or not
} else if (userAgent.contains("Opera Mini")) {
// Opera Mini
/*
* Opera Mini on Android 4.3
* "Opera/9.80 (Android; Opera Mini/7.6.40077/35.5706; U; en) Presto/2.8.119 Version/11.10"
*/
// Extreme mode
basicBrowser = true;
// } else if( userAgent.contains( "Trident/7" ) && userAgent.contains( "rv:11" ) ) { // Microsoft Internet Explorer 11
// /*
// * Microsoft Internet Explorer 11 on Microsoft Windows 8.1
// * "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; Touch; LCJB; rv:11.0) like Gecko"
// */
// basicBrowser = true;
//
// } else if( userAgent.contains( "OPR" ) ) { // Opera
// /*
// * Opera on Microsoft Windows 8.1
// * "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36 OPR/26.0.1656.24"
// * Opera on Android 4.3
// * "Mozilla/5.0 (Linux; Android 4.3; GT-I9300 Build/JSS15J) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.102 Mobile Safari/537.36 OPR/25.0.1619.84037"
// */
// String userAgentSubStr = userAgent.substring( userAgent.indexOf( "OPR" ) + 4 );
// int version = Integer.parseInt( userAgentSubStr.substring( 0, userAgentSubStr.indexOf( "." ) ) );
// basicBrowser = version < 20;
//
// } else if( userAgent.contains( "Edge" ) ) {
// /*
// * Microsoft Edge browser on Windows 10
// * Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393
// */
// basicBrowser = false;
//
// } else if( userAgent.contains( "Chrome" ) && ! userAgent.contains( "(Chrome)" ) ) { // Google Chrome
// /*
// * Google Chrome on Microsoft Windows 8.1
// * "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36"
// * Google Chrome on Android 4.3
// * "Mozilla/5.0 (Linux; Android 4.3; GT-I9300 Build/JSS15J) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.59 Mobile Safari/537.36"
// */
// String userAgentSubStr = userAgent.substring( userAgent.indexOf( "Chrome" ) + 7 );
// int version = Integer.parseInt( userAgentSubStr.substring( 0, userAgentSubStr.indexOf( "." ) ) );
// basicBrowser = version < 35;
//
// } else if( userAgent.contains( "Safari" ) ) { // Apple Safari
// /*
// * Apple Safari on Microsoft Windows 8.1
// * Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2
// */
// if( userAgent.contains( "Version" ) ) {
// String userAgentSubStr = userAgent.substring( userAgent.indexOf( "Version" ) + 8 );
// int version = Integer.parseInt( userAgentSubStr.substring( 0, userAgentSubStr.indexOf( "." ) ) );
// basicBrowser = version < 8;
// } else {
// String userAgentSubStr = userAgent.substring( userAgent.indexOf( "Safari" ) + 7 );
// int version = Integer.parseInt( userAgentSubStr.substring( 0, userAgentSubStr.indexOf( "." ) ) );
// basicBrowser = version < 538 || version > 620;
// }
//
// } else if( userAgent.contains( "Firefox" ) ) { // Mozilla Firefox
// /*
// * Mozilla Firefox on Microsoft 8.1
// * "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0 AlexaToolbar/alxf-2.21"
// * Mozilla Firefox on Android 4.3
// * "Mozilla/5.0 (Android; Mobile; rv:33.0) Gecko/33.0 Firefox/33.0"
// * Mozilla Firefox on Linux
// * "Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)"
// */
// String userAgentSubStr = userAgent.substring( userAgent.indexOf( "Firefox" ) + 8 );
// int version = Integer.parseInt( userAgentSubStr.substring( 0, userAgentSubStr.indexOf( "." ) ) );
// basicBrowser = version < 28;
} else if (userAgent.contains("Googlebot")) {
// Googlebot/2.1; || Googlebot-News || Googlebot-Image/1.0 || Googlebot-Video/1.0
isCrawler = true;
} else if (userAgent.equals("Google (+https://developers.google.com/+/web/snippet/)")) {
// Google+
isCrawler = true;
} else if (userAgent.contains("Bingbot")) {
// Microsoft Bing
isCrawler = true;
} else if (userAgent.contains("Slurp")) {
// Yahoo
isCrawler = true;
} else if (userAgent.contains("DuckDuckBot")) {
// DuckDuckGo
isCrawler = true;
} else if (userAgent.contains("Baiduspider")) {
// Baidu - China
isCrawler = true;
} else if (userAgent.contains("YandexBot")) {
// Yandex - Russia
isCrawler = true;
} else if (userAgent.contains("Exabot")) {
// ExaLead - France
isCrawler = true;
} else if (userAgent.equals("facebot") || userAgent.startsWith("facebookexternalhit/1.0") || userAgent.startsWith("facebookexternalhit/1.1")) {
// Facebook Scraping requests
isCrawler = true;
} else if (userAgent.startsWith("WhatsApp")) {
// Whatsapp
isCrawler = true;
} else if (userAgent.startsWith("ia_archiver")) {
// Alexa Crawler
isCrawler = true;
}
}
// Redirecting requests coming from basic browsers to BasicMode
if (basicBrowser && !basicMode && website != null && website.getMobileHostName() != null) {
response.setStatus(HttpServletResponse.SC_MOVED_TEMPORARILY);
String queryString = request.getQueryString();
if (queryString == null || queryString.isEmpty())
response.setHeader("Location", (request.isSecure() ? "https://" : "http://") + website.getMobileHostName() + requestUri);
else
response.setHeader("Location", (request.isSecure() ? "https://" : "http://") + website.getMobileHostName() + requestUri + "?" + request.getQueryString());
return;
}
threadLocalBasicMode.set(basicMode);
threadLocalWebsite.set(website);
threadLocalIsCrawler.set(isCrawler);
String referer = request.getHeader("referer");
if (// Eg: referer = android-app
referer == null || !referer.startsWith("http"))
threadLocalRefererHost.set(hostName);
else
threadLocalRefererHost.set(new URL(referer).getHost());
}
chain.doFilter(req, resp);
threadLocalBasicMode.remove();
threadLocalWebsite.remove();
threadLocalIsCrawler.remove();
threadLocalRefererHost.remove();
}
use of com.pratilipi.data.type.Page in project pratilipi by Pratilipi.
the class PageDataUtil method _getSitemapForTypePage.
private static String _getSitemapForTypePage(Long cursor, String hostName, Language language) {
DataAccessor dataAccessor = DataAccessorFactory.getDataAccessor();
List<Page> pageList = dataAccessor.getPageList(cursor, cursor + SITEMAP_PAGE_COUNT);
Map<Long, String> pratilipiPageMap = new HashMap<>();
Map<Long, String> authorPageMap = new HashMap<>();
Map<Long, String> blogPageMap = new HashMap<>();
Map<Long, String> blogPostPageMap = new HashMap<>();
Map<Long, String> eventPageMap = new HashMap<>();
for (Page page : pageList) {
String pageUri = page.getUriAlias() != null ? page.getUriAlias() : page.getUri();
switch(page.getType()) {
case PRATILIPI:
pratilipiPageMap.put(page.getPrimaryContentId(), pageUri);
break;
case AUTHOR:
authorPageMap.put(page.getPrimaryContentId(), pageUri);
break;
case BLOG:
blogPageMap.put(page.getPrimaryContentId(), pageUri);
break;
case BLOG_POST:
blogPostPageMap.put(page.getPrimaryContentId(), pageUri);
break;
case EVENT:
eventPageMap.put(page.getPrimaryContentId(), pageUri);
break;
default:
break;
}
}
Map<Long, Pratilipi> pratilipis = dataAccessor.getPratilipis(pratilipiPageMap.keySet());
Map<Long, Author> authors = dataAccessor.getAuthors(authorPageMap.keySet());
Map<Long, BlogPost> blogPosts = dataAccessor.getBlogPosts(blogPostPageMap.keySet());
Map<Long, Event> events = dataAccessor.getEvents(eventPageMap.keySet());
StringBuilder sitemap = new StringBuilder("<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + LINE_SEPARATOR);
sitemap.append("<urlset xmlns=\"" + SITEMAP_NAMESPACE + "\">" + LINE_SEPARATOR);
// For a few cases when sitemap is empty, add an entry to avoid error on crawler
if (pratilipiPageMap.isEmpty() && authorPageMap.isEmpty() && blogPageMap.isEmpty() && blogPostPageMap.isEmpty() && eventPageMap.isEmpty()) {
sitemap.append(_getSitemapEntry(hostName, "/", null, "hourly", "0.9"));
}
for (Long pratilipiId : pratilipiPageMap.keySet()) {
Pratilipi pratilipi = pratilipis.get(pratilipiId);
if (pratilipi.getLanguage() != language)
continue;
if (pratilipi.getState() != PratilipiState.PUBLISHED)
continue;
sitemap.append(_getSitemapEntry(hostName, pratilipiPageMap.get(pratilipiId), pratilipi.getLastUpdated(), "daily", "0.7"));
sitemap.append(_getSitemapEntry(hostName, "/read?" + RequestParameter.CONTENT_ID.getName() + "=" + pratilipiId, pratilipi.getLastUpdated(), "daily", "0.7"));
}
for (Long authorId : authorPageMap.keySet()) {
Author author = authors.get(authorId);
if (author.getLanguage() != language)
continue;
if (author.getState() != AuthorState.ACTIVE)
continue;
sitemap.append(_getSitemapEntry(hostName, authorPageMap.get(authorId), author.getLastUpdated(), "daily", "0.6"));
}
for (Long blogId : blogPageMap.keySet()) sitemap.append(_getSitemapEntry(hostName, blogPageMap.get(blogId), null, "weekly", null));
for (Long blogPostId : blogPostPageMap.keySet()) {
BlogPost blogPost = blogPosts.get(blogPostId);
if (blogPost.getLanguage() != language)
continue;
if (blogPost.getState() != BlogPostState.PUBLISHED)
continue;
sitemap.append(_getSitemapEntry(hostName, blogPostPageMap.get(blogPostId), blogPost.getLastUpdated(), "weekly", "0.6"));
}
for (Long eventId : eventPageMap.keySet()) {
Event event = events.get(eventId);
if (event.getLanguage() != language)
continue;
sitemap.append(_getSitemapEntry(hostName, eventPageMap.get(eventId), event.getLastUpdated(), "weekly", "0.6"));
}
sitemap.append("</urlset>");
return sitemap.toString();
}
use of com.pratilipi.data.type.Page in project pratilipi by Pratilipi.
the class PratilipiDataUtil method updateFacebookScrape.
public static void updateFacebookScrape(List<Long> pratilipiIdList) throws UnexpectedServerException {
DataAccessor dataAccessor = DataAccessorFactory.getDataAccessor();
for (Long pratilipiId : pratilipiIdList) {
Pratilipi pratilipi = dataAccessor.getPratilipi(pratilipiId);
if (pratilipi.getState() != PratilipiState.PUBLISHED)
continue;
Page page = dataAccessor.getPage(PageType.PRATILIPI, pratilipiId);
String uri = page.getUriAlias() == null ? page.getUri() : page.getUriAlias();
FacebookApi.postScrapeRequest("http://" + pratilipi.getLanguage().getHostName() + uri);
}
}
use of com.pratilipi.data.type.Page in project pratilipi by Pratilipi.
the class PratilipiDataUtil method createOrUpdatePratilipiReadPageUrl.
public static boolean createOrUpdatePratilipiReadPageUrl(Long pratilipiId) {
DataAccessor dataAccessor = DataAccessorFactory.getDataAccessor();
Page page = dataAccessor.getPage(PageType.PRATILIPI, pratilipiId);
Page readPage = dataAccessor.getPage(PageType.READ, pratilipiId);
if (readPage == null) {
readPage = dataAccessor.newPage();
readPage.setType(PageType.READ);
readPage.setUri(PageType.READ.getUrlPrefix() + pratilipiId);
readPage.setPrimaryContentId(pratilipiId);
readPage.setCreationDate(new Date());
} else if ((page.getUriAlias() == null && readPage.getUriAlias() == null) || (page.getUriAlias() != null && readPage.getUriAlias() != null && page.getUri().equals(page.getUriAlias()))) {
return false;
}
readPage.setUriAlias(page.getUriAlias() == null ? null : page.getUriAlias() + "/read");
readPage = dataAccessor.createOrUpdatePage(readPage);
return true;
}
use of com.pratilipi.data.type.Page in project pratilipi by Pratilipi.
the class EventDataUtil method _updateEventPageUrl.
private static Page _updateEventPageUrl(Event event) {
DataAccessor dataAccessor = DataAccessorFactory.getDataAccessor();
Page page = dataAccessor.getPage(PageType.EVENT, event.getId());
boolean isNew = page == null;
if (isNew) {
page = dataAccessor.newPage();
page.setType(PageType.EVENT);
page.setUri(PageType.EVENT.getUrlPrefix() + event.getId());
page.setPrimaryContentId(event.getId());
page.setCreationDate(new Date());
}
String uriAlias = UriAliasUtil.generateUriAlias(page.getUriAlias(), PageType.EVENT.getUrlPrefix(), event.getNameEn() == null ? event.getName() : event.getNameEn());
if (isNew && uriAlias == null) {
// Do NOT return.
} else if (uriAlias == page.getUriAlias() || (uriAlias != null && uriAlias.equals(page.getUriAlias())) || (page.getUriAlias() != null && page.getUriAlias().equals(uriAlias))) {
// Do Nothing.
return null;
} else {
logger.log(Level.INFO, "Updating Event Page Url: '" + page.getUriAlias() + "' -> '" + uriAlias + "'");
page.setUriAlias(uriAlias);
}
return page;
}
Aggregations