Search in sources :

Example 26 with Page

use of com.pratilipi.data.type.Page in project pratilipi by Pratilipi.

the class UxModeFilter method doFilter.

@Override
public void doFilter(ServletRequest req, ServletResponse resp, FilterChain chain) throws IOException, ServletException {
    if (isAndroidApp) {
        threadLocalBasicMode.set(false);
        threadLocalWebsite.set(null);
        threadLocalRefererHost.set(null);
    } else {
        HttpServletRequest request = (HttpServletRequest) req;
        HttpServletResponse response = (HttpServletResponse) resp;
        String hostName = request.getServerName();
        String requestUri = request.getRequestURI();
        String userAgent = request.getHeader("user-agent");
        // Defaults - for all test environments
        boolean basicMode = false;
        Website website = null;
        // Figuring out Mode and Languages from a pre-configured list
        for (Website web : Website.values()) {
            if (hostName.equals(web.getHostName())) {
                basicMode = false;
                website = web;
                break;
            } else if (hostName.equals(web.getMobileHostName())) {
                basicMode = true;
                website = web;
                break;
            }
        }
        // NOTE: DO NOT redirect Facebook Scraping requests
        if (isWebApp && website == Website.ALL_LANGUAGE && (userAgent == null || userAgent.isEmpty() || !userAgent.startsWith("facebookexternalhit/1.1"))) {
            String destHostName = null;
            DataAccessor dataAccessor = DataAccessorFactory.getDataAccessor();
            Page page = dataAccessor.getPage(requestUri);
            if (page == null) {
            // Do Nothing
            } else if (page.getType() == PageType.PRATILIPI) {
                Pratilipi pratilipi = dataAccessor.getPratilipi(page.getPrimaryContentId());
                for (Website web : Website.values()) {
                    if (web.getFilterLanguage() == pratilipi.getLanguage()) {
                        destHostName = basicMode ? web.getMobileHostName() : web.getHostName();
                        break;
                    }
                }
            } else if (page.getType() == PageType.AUTHOR) {
                Author author = dataAccessor.getAuthor(page.getPrimaryContentId());
                for (Website web : Website.values()) {
                    if (web.getFilterLanguage() == author.getLanguage()) {
                        destHostName = basicMode ? web.getMobileHostName() : web.getHostName();
                        break;
                    }
                }
            } else if (page.getType() == PageType.EVENT) {
                Event event = dataAccessor.getEvent(page.getPrimaryContentId());
                for (Website web : Website.values()) {
                    if (web.getFilterLanguage() == event.getLanguage()) {
                        destHostName = basicMode ? web.getMobileHostName() : web.getHostName();
                        break;
                    }
                }
            } else if (page.getType() == PageType.BLOG_POST) {
                BlogPost blogPost = dataAccessor.getBlogPost(page.getPrimaryContentId());
                for (Website web : Website.values()) {
                    if (web.getFilterLanguage() == blogPost.getLanguage()) {
                        destHostName = basicMode ? web.getMobileHostName() : web.getHostName();
                        break;
                    }
                }
            }
            if (destHostName != null) {
                response.setStatus(HttpServletResponse.SC_MOVED_PERMANENTLY);
                response.setHeader("Location", (request.isSecure() ? "https://" : "http://") + destHostName + requestUri);
                return;
            }
        }
        // NOTE: DO NOT redirect Facebook Scraping requests
        if (isWebApp && (userAgent == null || userAgent.isEmpty() || !userAgent.startsWith("facebookexternalhit/1.1"))) {
            DataAccessor dataAccessor = DataAccessorFactory.getDataAccessor();
            Page page = dataAccessor.getPage(requestUri);
            if (page != null && page.getUriAlias() != null && requestUri.equals(page.getUri())) {
                response.setStatus(HttpServletResponse.SC_MOVED_PERMANENTLY);
                response.setHeader("Location", page.getUriAlias());
                return;
            }
        }
        // Figuring out Browser capability
        boolean basicBrowser = false;
        boolean isCrawler = false;
        if (isWebApp) {
            if (userAgent == null || userAgent.trim().isEmpty()) {
                basicBrowser = true;
            } else if (userAgent.contains("UCBrowser")) {
                // UCBrowser
                /*
					 * UCBrowser on Android 4.3
					 *   "Mozilla/5.0 (Linux; U; Android 4.3; en-US; GT-I9300 Build/JSS15J) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 UCBrowser/10.0.1.512 U3/0.8.0 Mobile Safari/533.1"
					 */
                // Extreme mode
                basicBrowser = true;
            //				} else if( userAgent.contains( "Opera Mobi" ) ) { // Opera Classic
            //					/*
            //					 * Opera Classic on Android 4.3
            //					 *   "Opera/9.80 (Android 4.3; Linux; Opera Mobi/ADR-1411061201) Presto/2.11.355 Version/12.10"
            //					 */
            //					basicBrowser = true; // Not sure whether Polymer 1.0 is supported or not
            } else if (userAgent.contains("Opera Mini")) {
                // Opera Mini
                /*
					 * Opera Mini on Android 4.3
					 *   "Opera/9.80 (Android; Opera Mini/7.6.40077/35.5706; U; en) Presto/2.8.119 Version/11.10"
					 */
                // Extreme mode
                basicBrowser = true;
            //				} else if( userAgent.contains( "Trident/7" ) && userAgent.contains( "rv:11" ) ) { // Microsoft Internet Explorer 11
            //					/*
            //					 * Microsoft Internet Explorer 11 on Microsoft Windows 8.1
            //					 *   "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; Touch; LCJB; rv:11.0) like Gecko"
            //					 */
            //					basicBrowser = true;
            //
            //				} else if( userAgent.contains( "OPR" ) ) { // Opera
            //					/*
            //					 * Opera on Microsoft Windows 8.1
            //					 *   "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36 OPR/26.0.1656.24"
            //					 * Opera on Android 4.3
            //					 *   "Mozilla/5.0 (Linux; Android 4.3; GT-I9300 Build/JSS15J) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.102 Mobile Safari/537.36 OPR/25.0.1619.84037"
            //					 */
            //					String userAgentSubStr = userAgent.substring( userAgent.indexOf( "OPR" ) + 4 );
            //					int version = Integer.parseInt( userAgentSubStr.substring( 0, userAgentSubStr.indexOf( "." ) ) );
            //					basicBrowser = version < 20;
            //
            //				} else if( userAgent.contains( "Edge" ) ) {
            //					/*
            //					 * Microsoft Edge browser on Windows 10
            //					 * Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393
            //					 */
            //					basicBrowser = false;
            //
            //				} else if( userAgent.contains( "Chrome" ) && ! userAgent.contains( "(Chrome)" ) ) { // Google Chrome
            //					/*
            //					 * Google Chrome on Microsoft Windows 8.1
            //					 *   "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36"
            //					 * Google Chrome on Android 4.3
            //					 *   "Mozilla/5.0 (Linux; Android 4.3; GT-I9300 Build/JSS15J) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.59 Mobile Safari/537.36"
            //					 */
            //					String userAgentSubStr = userAgent.substring( userAgent.indexOf( "Chrome" ) + 7 );
            //					int version = Integer.parseInt( userAgentSubStr.substring( 0, userAgentSubStr.indexOf( "." ) ) );
            //					basicBrowser = version < 35;
            //
            //				} else if( userAgent.contains( "Safari" ) ) { // Apple Safari
            //					/*
            //					 * Apple Safari on Microsoft Windows 8.1
            //					 *   Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2
            //					 */
            //					if( userAgent.contains( "Version" ) ) {
            //						String userAgentSubStr = userAgent.substring( userAgent.indexOf( "Version" ) + 8 );
            //						int version = Integer.parseInt( userAgentSubStr.substring( 0, userAgentSubStr.indexOf( "." ) ) );
            //						basicBrowser = version < 8;
            //					} else {
            //						String userAgentSubStr = userAgent.substring( userAgent.indexOf( "Safari" ) + 7 );
            //						int version = Integer.parseInt( userAgentSubStr.substring( 0, userAgentSubStr.indexOf( "." ) ) );
            //						basicBrowser = version < 538 || version > 620;
            //					}
            //
            //				} else if( userAgent.contains( "Firefox" ) ) { // Mozilla Firefox
            //					/*
            //					 * Mozilla Firefox on Microsoft 8.1
            //					 *   "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:33.0) Gecko/20100101 Firefox/33.0 AlexaToolbar/alxf-2.21"
            //					 * Mozilla Firefox on Android 4.3
            //					 *   "Mozilla/5.0 (Android; Mobile; rv:33.0) Gecko/33.0 Firefox/33.0"
            //					 * Mozilla Firefox on Linux
            //					 *   "Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)"
            //					 */
            //					String userAgentSubStr = userAgent.substring( userAgent.indexOf( "Firefox" ) + 8 );
            //					int version = Integer.parseInt( userAgentSubStr.substring( 0, userAgentSubStr.indexOf( "." ) ) );
            //					basicBrowser = version < 28;
            } else if (userAgent.contains("Googlebot")) {
                // Googlebot/2.1; || Googlebot-News || Googlebot-Image/1.0 || Googlebot-Video/1.0
                isCrawler = true;
            } else if (userAgent.equals("Google (+https://developers.google.com/+/web/snippet/)")) {
                // Google+
                isCrawler = true;
            } else if (userAgent.contains("Bingbot")) {
                // Microsoft Bing
                isCrawler = true;
            } else if (userAgent.contains("Slurp")) {
                // Yahoo
                isCrawler = true;
            } else if (userAgent.contains("DuckDuckBot")) {
                // DuckDuckGo
                isCrawler = true;
            } else if (userAgent.contains("Baiduspider")) {
                // Baidu - China
                isCrawler = true;
            } else if (userAgent.contains("YandexBot")) {
                // Yandex - Russia
                isCrawler = true;
            } else if (userAgent.contains("Exabot")) {
                // ExaLead - France
                isCrawler = true;
            } else if (userAgent.equals("facebot") || userAgent.startsWith("facebookexternalhit/1.0") || userAgent.startsWith("facebookexternalhit/1.1")) {
                // Facebook Scraping requests
                isCrawler = true;
            } else if (userAgent.startsWith("WhatsApp")) {
                // Whatsapp
                isCrawler = true;
            } else if (userAgent.startsWith("ia_archiver")) {
                // Alexa Crawler
                isCrawler = true;
            }
        }
        // Redirecting requests coming from basic browsers to BasicMode
        if (basicBrowser && !basicMode && website != null && website.getMobileHostName() != null) {
            response.setStatus(HttpServletResponse.SC_MOVED_TEMPORARILY);
            String queryString = request.getQueryString();
            if (queryString == null || queryString.isEmpty())
                response.setHeader("Location", (request.isSecure() ? "https://" : "http://") + website.getMobileHostName() + requestUri);
            else
                response.setHeader("Location", (request.isSecure() ? "https://" : "http://") + website.getMobileHostName() + requestUri + "?" + request.getQueryString());
            return;
        }
        threadLocalBasicMode.set(basicMode);
        threadLocalWebsite.set(website);
        threadLocalIsCrawler.set(isCrawler);
        String referer = request.getHeader("referer");
        if (// Eg: referer = android-app
        referer == null || !referer.startsWith("http"))
            threadLocalRefererHost.set(hostName);
        else
            threadLocalRefererHost.set(new URL(referer).getHost());
    }
    chain.doFilter(req, resp);
    threadLocalBasicMode.remove();
    threadLocalWebsite.remove();
    threadLocalIsCrawler.remove();
    threadLocalRefererHost.remove();
}
Also used : HttpServletRequest(javax.servlet.http.HttpServletRequest) DataAccessor(com.pratilipi.data.DataAccessor) HttpServletResponse(javax.servlet.http.HttpServletResponse) Website(com.pratilipi.common.type.Website) Author(com.pratilipi.data.type.Author) Event(com.pratilipi.data.type.Event) Page(com.pratilipi.data.type.Page) Pratilipi(com.pratilipi.data.type.Pratilipi) BlogPost(com.pratilipi.data.type.BlogPost) URL(java.net.URL)

Example 27 with Page

use of com.pratilipi.data.type.Page in project pratilipi by Pratilipi.

the class PageDataUtil method _getSitemapForTypePage.

private static String _getSitemapForTypePage(Long cursor, String hostName, Language language) {
    DataAccessor dataAccessor = DataAccessorFactory.getDataAccessor();
    List<Page> pageList = dataAccessor.getPageList(cursor, cursor + SITEMAP_PAGE_COUNT);
    Map<Long, String> pratilipiPageMap = new HashMap<>();
    Map<Long, String> authorPageMap = new HashMap<>();
    Map<Long, String> blogPageMap = new HashMap<>();
    Map<Long, String> blogPostPageMap = new HashMap<>();
    Map<Long, String> eventPageMap = new HashMap<>();
    for (Page page : pageList) {
        String pageUri = page.getUriAlias() != null ? page.getUriAlias() : page.getUri();
        switch(page.getType()) {
            case PRATILIPI:
                pratilipiPageMap.put(page.getPrimaryContentId(), pageUri);
                break;
            case AUTHOR:
                authorPageMap.put(page.getPrimaryContentId(), pageUri);
                break;
            case BLOG:
                blogPageMap.put(page.getPrimaryContentId(), pageUri);
                break;
            case BLOG_POST:
                blogPostPageMap.put(page.getPrimaryContentId(), pageUri);
                break;
            case EVENT:
                eventPageMap.put(page.getPrimaryContentId(), pageUri);
                break;
            default:
                break;
        }
    }
    Map<Long, Pratilipi> pratilipis = dataAccessor.getPratilipis(pratilipiPageMap.keySet());
    Map<Long, Author> authors = dataAccessor.getAuthors(authorPageMap.keySet());
    Map<Long, BlogPost> blogPosts = dataAccessor.getBlogPosts(blogPostPageMap.keySet());
    Map<Long, Event> events = dataAccessor.getEvents(eventPageMap.keySet());
    StringBuilder sitemap = new StringBuilder("<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + LINE_SEPARATOR);
    sitemap.append("<urlset xmlns=\"" + SITEMAP_NAMESPACE + "\">" + LINE_SEPARATOR);
    // For a few cases when sitemap is empty, add an entry to avoid error on crawler
    if (pratilipiPageMap.isEmpty() && authorPageMap.isEmpty() && blogPageMap.isEmpty() && blogPostPageMap.isEmpty() && eventPageMap.isEmpty()) {
        sitemap.append(_getSitemapEntry(hostName, "/", null, "hourly", "0.9"));
    }
    for (Long pratilipiId : pratilipiPageMap.keySet()) {
        Pratilipi pratilipi = pratilipis.get(pratilipiId);
        if (pratilipi.getLanguage() != language)
            continue;
        if (pratilipi.getState() != PratilipiState.PUBLISHED)
            continue;
        sitemap.append(_getSitemapEntry(hostName, pratilipiPageMap.get(pratilipiId), pratilipi.getLastUpdated(), "daily", "0.7"));
        sitemap.append(_getSitemapEntry(hostName, "/read?" + RequestParameter.CONTENT_ID.getName() + "=" + pratilipiId, pratilipi.getLastUpdated(), "daily", "0.7"));
    }
    for (Long authorId : authorPageMap.keySet()) {
        Author author = authors.get(authorId);
        if (author.getLanguage() != language)
            continue;
        if (author.getState() != AuthorState.ACTIVE)
            continue;
        sitemap.append(_getSitemapEntry(hostName, authorPageMap.get(authorId), author.getLastUpdated(), "daily", "0.6"));
    }
    for (Long blogId : blogPageMap.keySet()) sitemap.append(_getSitemapEntry(hostName, blogPageMap.get(blogId), null, "weekly", null));
    for (Long blogPostId : blogPostPageMap.keySet()) {
        BlogPost blogPost = blogPosts.get(blogPostId);
        if (blogPost.getLanguage() != language)
            continue;
        if (blogPost.getState() != BlogPostState.PUBLISHED)
            continue;
        sitemap.append(_getSitemapEntry(hostName, blogPostPageMap.get(blogPostId), blogPost.getLastUpdated(), "weekly", "0.6"));
    }
    for (Long eventId : eventPageMap.keySet()) {
        Event event = events.get(eventId);
        if (event.getLanguage() != language)
            continue;
        sitemap.append(_getSitemapEntry(hostName, eventPageMap.get(eventId), event.getLastUpdated(), "weekly", "0.6"));
    }
    sitemap.append("</urlset>");
    return sitemap.toString();
}
Also used : HashMap(java.util.HashMap) DataAccessor(com.pratilipi.data.DataAccessor) Page(com.pratilipi.data.type.Page) BlogPost(com.pratilipi.data.type.BlogPost) Author(com.pratilipi.data.type.Author) Event(com.pratilipi.data.type.Event) Pratilipi(com.pratilipi.data.type.Pratilipi)

Example 28 with Page

use of com.pratilipi.data.type.Page in project pratilipi by Pratilipi.

the class PratilipiDataUtil method updateFacebookScrape.

public static void updateFacebookScrape(List<Long> pratilipiIdList) throws UnexpectedServerException {
    DataAccessor dataAccessor = DataAccessorFactory.getDataAccessor();
    for (Long pratilipiId : pratilipiIdList) {
        Pratilipi pratilipi = dataAccessor.getPratilipi(pratilipiId);
        if (pratilipi.getState() != PratilipiState.PUBLISHED)
            continue;
        Page page = dataAccessor.getPage(PageType.PRATILIPI, pratilipiId);
        String uri = page.getUriAlias() == null ? page.getUri() : page.getUriAlias();
        FacebookApi.postScrapeRequest("http://" + pratilipi.getLanguage().getHostName() + uri);
    }
}
Also used : DataAccessor(com.pratilipi.data.DataAccessor) Page(com.pratilipi.data.type.Page) UserPratilipi(com.pratilipi.data.type.UserPratilipi) Pratilipi(com.pratilipi.data.type.Pratilipi)

Example 29 with Page

use of com.pratilipi.data.type.Page in project pratilipi by Pratilipi.

the class PratilipiDataUtil method createOrUpdatePratilipiReadPageUrl.

public static boolean createOrUpdatePratilipiReadPageUrl(Long pratilipiId) {
    DataAccessor dataAccessor = DataAccessorFactory.getDataAccessor();
    Page page = dataAccessor.getPage(PageType.PRATILIPI, pratilipiId);
    Page readPage = dataAccessor.getPage(PageType.READ, pratilipiId);
    if (readPage == null) {
        readPage = dataAccessor.newPage();
        readPage.setType(PageType.READ);
        readPage.setUri(PageType.READ.getUrlPrefix() + pratilipiId);
        readPage.setPrimaryContentId(pratilipiId);
        readPage.setCreationDate(new Date());
    } else if ((page.getUriAlias() == null && readPage.getUriAlias() == null) || (page.getUriAlias() != null && readPage.getUriAlias() != null && page.getUri().equals(page.getUriAlias()))) {
        return false;
    }
    readPage.setUriAlias(page.getUriAlias() == null ? null : page.getUriAlias() + "/read");
    readPage = dataAccessor.createOrUpdatePage(readPage);
    return true;
}
Also used : DataAccessor(com.pratilipi.data.DataAccessor) Page(com.pratilipi.data.type.Page) Date(java.util.Date)

Example 30 with Page

use of com.pratilipi.data.type.Page in project pratilipi by Pratilipi.

the class EventDataUtil method _updateEventPageUrl.

private static Page _updateEventPageUrl(Event event) {
    DataAccessor dataAccessor = DataAccessorFactory.getDataAccessor();
    Page page = dataAccessor.getPage(PageType.EVENT, event.getId());
    boolean isNew = page == null;
    if (isNew) {
        page = dataAccessor.newPage();
        page.setType(PageType.EVENT);
        page.setUri(PageType.EVENT.getUrlPrefix() + event.getId());
        page.setPrimaryContentId(event.getId());
        page.setCreationDate(new Date());
    }
    String uriAlias = UriAliasUtil.generateUriAlias(page.getUriAlias(), PageType.EVENT.getUrlPrefix(), event.getNameEn() == null ? event.getName() : event.getNameEn());
    if (isNew && uriAlias == null) {
    // Do NOT return.
    } else if (uriAlias == page.getUriAlias() || (uriAlias != null && uriAlias.equals(page.getUriAlias())) || (page.getUriAlias() != null && page.getUriAlias().equals(uriAlias))) {
        // Do Nothing.
        return null;
    } else {
        logger.log(Level.INFO, "Updating Event Page Url: '" + page.getUriAlias() + "' -> '" + uriAlias + "'");
        page.setUriAlias(uriAlias);
    }
    return page;
}
Also used : DataAccessor(com.pratilipi.data.DataAccessor) Page(com.pratilipi.data.type.Page) Date(java.util.Date)

Aggregations

Page (com.pratilipi.data.type.Page)37 DataAccessor (com.pratilipi.data.DataAccessor)28 ArrayList (java.util.ArrayList)12 Pratilipi (com.pratilipi.data.type.Pratilipi)10 Author (com.pratilipi.data.type.Author)8 Date (java.util.Date)8 HashMap (java.util.HashMap)7 UserPratilipi (com.pratilipi.data.type.UserPratilipi)6 JsonObject (com.google.gson.JsonObject)5 InvalidArgumentException (com.pratilipi.common.exception.InvalidArgumentException)5 GenericResponse (com.pratilipi.api.shared.GenericResponse)4 UnexpectedServerException (com.pratilipi.common.exception.UnexpectedServerException)4 Event (com.pratilipi.data.type.Event)4 LinkedList (java.util.LinkedList)4 Gson (com.google.gson.Gson)3 Post (com.pratilipi.api.annotation.Post)3 DocAccessor (com.pratilipi.data.DocAccessor)3 AuthorData (com.pratilipi.data.client.AuthorData)3 BlogPost (com.pratilipi.data.type.BlogPost)3 PratilipiV2Api (com.pratilipi.api.impl.pratilipi.PratilipiV2Api)2