Search in sources :

Example 51 with Page

use of org.asqatasun.entity.subject.Page in project Asqatasun by Asqatasun.

the class ContentDAOImpl method findNumberOfOrphanContentFromWebResource.

@Override
public Long findNumberOfOrphanContentFromWebResource(WebResource webResource) {
    if (webResource instanceof Page) {
        Query query = entityManager.createQuery(SELECT_DISTINCT_SSP + SSPImpl.class.getName() + " s" + JOIN_WR + WEB_RESOURCE_CONDITION + HTTP_STATUS_CONDITION);
        query.setParameter(WEB_RESOURCE_KEY, webResource);
        query.setParameter(HTTP_STATUS_CODE_KEY, DEFAULT_HTTP_STATUS_VALUE);
        return (Long) query.getSingleResult();
    } else if (webResource instanceof Site) {
        Query query = entityManager.createQuery(SELECT_DISTINCT_SSP + SSPImpl.class.getName() + " s" + JOIN_WR + " JOIN w.parent p" + " WHERE p=:webResource" + HTTP_STATUS_CONDITION);
        query.setParameter(WEB_RESOURCE_KEY, webResource);
        query.setParameter(HTTP_STATUS_CODE_KEY, DEFAULT_HTTP_STATUS_VALUE);
        return (Long) query.getSingleResult();
    }
    return (long) 0;
}
Also used : Site(org.asqatasun.entity.subject.Site) Query(javax.persistence.Query) Page(org.asqatasun.entity.subject.Page)

Example 52 with Page

use of org.asqatasun.entity.subject.Page in project Asqatasun by Asqatasun.

the class ContentDAOImpl method findOrphanRelatedContentList.

@Override
public List<Content> findOrphanRelatedContentList(WebResource webResource, int start, int chunkSize) {
    if (webResource instanceof Page) {
        Query query = entityManager.createQuery("SELECT distinct rc FROM " + RelatedContentImpl.class.getName() + RELATED_CONTENT_KEY + JOIN_PARENT_CONTENT_SET + JOIN_WR + WEB_RESOURCE_CONDITION + " AND rc.httpStatusCode =:httpStatusCode");
        query.setParameter(WEB_RESOURCE_KEY, webResource);
        query.setParameter(HTTP_STATUS_CODE_KEY, DEFAULT_HTTP_STATUS_VALUE);
        query.setFirstResult(start);
        query.setMaxResults(chunkSize);
        return (List<Content>) query.getResultList();
    } else if (webResource instanceof Site) {
        Query query = entityManager.createQuery("SELECT distinct rc FROM " + RelatedContentImpl.class.getName() + RELATED_CONTENT_KEY + JOIN_PARENT_CONTENT_SET + JOIN_WR + " WHERE w.parent.id=:idWebResource" + " AND rc.httpStatusCode =:httpStatusCode");
        query.setParameter(ID_WEB_RESOURCE_KEY, webResource.getId());
        query.setParameter(HTTP_STATUS_CODE_KEY, DEFAULT_HTTP_STATUS_VALUE);
        query.setFirstResult(start);
        query.setMaxResults(chunkSize);
        List<Content> contentList = (List<Content>) query.getResultList();
        flushAndCloseEntityManager();
        return contentList;
    }
    return Collections.emptyList();
}
Also used : Site(org.asqatasun.entity.subject.Site) Query(javax.persistence.Query) Page(org.asqatasun.entity.subject.Page)

Example 53 with Page

use of org.asqatasun.entity.subject.Page in project Asqatasun by Asqatasun.

the class ScenarioLoaderImplTest method testRun.

/**
     * For this test, we use the pages hosted at http://site.asqatasun.ovh/.
     * The sequence is : 
     *      - get the page "http://site.asqatasun.ovh/."
     *      - click on "This page won't be crawled due to the robots.txt restrictrion" link 
     *      - get the page "http://site.asqatasun.ovh/page-access-forbidden-for-robots.html"
     *      - click on "Back" button
     *      - get the page "http://site.asqatasun.ovh/."
     *      - click on "This page will be crawled" link  
     *      - get the page "http://site.asqatasun.ovh/page-1.html"
     *      - click on "This page won't be crawled due to the robots.txt restrictrion" link 
     *      - get the page "http://site.asqatasun.ovh/page-access-forbidden-for-robots.html"
     *      - click on "Back" button
     *      - get the page "http://site.asqatasun.ovh/page-1.html"
     *      - click on "This page will be crawled" link
     *      - get the page "http://site.asqatasun.ovh/page-2.html"
     *      - click on "This page won't be crawled due to the robots.txt restrictrion" link 
     *      - get the page "http://site.asqatasun.ovh/page-access-forbidden-for-robots.html"
     * 
     * For each retrieved page, a webResource is created and added to the 
     * parent WebResource and a SSP is created that handles the source code of 
     * the page.
     * 
     * In this test, we don't mock the HarFileContentLoaderFactory and the ProfileFactory
     * 
     * This test is a Functionnal test
     * 
     * Test of run method, of class ScenarioLoaderImpl.
     */
public void testRun() {
    System.out.println("run");
    Date date = new Date();
    DateFactory mockDateFactory = createMock(DateFactory.class);
    expect(mockDateFactory.createDate()).andReturn(date).times(8);
    ContentDataService mockContentDataService = createMock(ContentDataService.class);
    WebResourceDataService mockWebResourceDataService = createMock(WebResourceDataService.class);
    Page mockPage1 = createMock(Page.class);
    expect(mockWebResourceDataService.createPage(ROOT_PAGE_URL)).andReturn(mockPage1).once();
    //        expect(mockPage1.getURL())
    //                .andReturn(ROOT_PAGE_URL)
    //                .once();
    expect(mockWebResourceDataService.saveOrUpdate(mockPage1)).andReturn(mockPage1).once();
    Page mockPage2 = createMock(Page.class);
    expect(mockWebResourceDataService.createPage(PAGE_ACCESS_FORBIDDEN_URL)).andReturn(mockPage2).once();
    //        expect(mockPage2.getURL())
    //                .andReturn(PAGE_ACCESS_FORBIDDEN_URL)
    //                .once();
    expect(mockWebResourceDataService.saveOrUpdate(mockPage2)).andReturn(mockPage2).once();
    Page mockPage3 = createMock(Page.class);
    expect(mockWebResourceDataService.createPage(ROOT_PAGE_URL)).andReturn(mockPage3).once();
    //        expect(mockPage3.getURL())
    //                .andReturn(ROOT_PAGE_URL)
    //                .once();
    expect(mockWebResourceDataService.saveOrUpdate(mockPage3)).andReturn(mockPage3).once();
    Page mockPage4 = createMock(Page.class);
    expect(mockWebResourceDataService.createPage(PAGE_1_URL)).andReturn(mockPage4).once();
    //        expect(mockPage4.getURL())
    //                .andReturn(PAGE_1_URL)
    //                .once();
    expect(mockWebResourceDataService.saveOrUpdate(mockPage4)).andReturn(mockPage4).once();
    Page mockPage5 = createMock(Page.class);
    expect(mockWebResourceDataService.createPage(PAGE_ACCESS_FORBIDDEN_URL)).andReturn(mockPage5).once();
    //        expect(mockPage5.getURL())
    //                .andReturn(PAGE_ACCESS_FORBIDDEN_URL)
    //                .once();
    expect(mockWebResourceDataService.saveOrUpdate(mockPage5)).andReturn(mockPage5).once();
    Page mockPage6 = createMock(Page.class);
    expect(mockWebResourceDataService.createPage(PAGE_1_URL)).andReturn(mockPage6).once();
    //        expect(mockPage6.getURL())
    //                .andReturn(PAGE_1_URL)
    //                .once();
    expect(mockWebResourceDataService.saveOrUpdate(mockPage6)).andReturn(mockPage6).once();
    Page mockPage7 = createMock(Page.class);
    expect(mockWebResourceDataService.createPage(PAGE_2_URL)).andReturn(mockPage7).once();
    //        expect(mockPage7.getURL())
    //                .andReturn(PAGE_2_URL)
    //                .once();
    expect(mockWebResourceDataService.saveOrUpdate(mockPage7)).andReturn(mockPage7).once();
    Page mockPage8 = createMock(Page.class);
    expect(mockWebResourceDataService.createPage(PAGE_ACCESS_FORBIDDEN_URL)).andReturn(mockPage8).once();
    //        expect(mockPage8.getURL())
    //                .andReturn(PAGE_ACCESS_FORBIDDEN_URL)
    //                .once();
    expect(mockWebResourceDataService.saveOrUpdate(mockPage8)).andReturn(mockPage8).once();
    SSP ssp1 = createMock(SSP.class);
    expect(mockContentDataService.getSSP(date, ROOT_PAGE_URL, pageMap.get(ROOT_PAGE_URL), null, HttpStatus.SC_OK)).andReturn(ssp1).once();
    expect(ssp1.getHttpStatusCode()).andReturn(HttpStatus.SC_OK).once();
    expect(ssp1.getURI()).andReturn(ROOT_PAGE_URL).once();
    ssp1.setDOM(readFile("htmlFiles/root-page_1.html", false));
    ssp1.setPage(mockPage1);
    expect(mockContentDataService.saveOrUpdate(ssp1)).andReturn(ssp1).once();
    SSP ssp2 = createMock(SSP.class);
    expect(mockContentDataService.getSSP(date, PAGE_ACCESS_FORBIDDEN_URL, pageMap.get(PAGE_ACCESS_FORBIDDEN_URL), null, HttpStatus.SC_OK)).andReturn(ssp2).once();
    expect(ssp2.getHttpStatusCode()).andReturn(HttpStatus.SC_OK).once();
    expect(ssp2.getURI()).andReturn(PAGE_ACCESS_FORBIDDEN_URL).once();
    ssp2.setDOM(readFile("htmlFiles/page-access-forbidden_1.html", false));
    ssp2.setPage(mockPage2);
    expect(mockContentDataService.saveOrUpdate(ssp2)).andReturn(ssp2).once();
    SSP ssp3 = createMock(SSP.class);
    expect(mockContentDataService.getSSP(date, ROOT_PAGE_URL, pageMap.get(ROOT_PAGE_URL), null, HttpStatus.SC_OK)).andReturn(ssp3).once();
    expect(ssp3.getHttpStatusCode()).andReturn(HttpStatus.SC_OK).once();
    expect(ssp3.getURI()).andReturn(ROOT_PAGE_URL).once();
    ssp3.setDOM(readFile("htmlFiles/root-page_1.html", false));
    ssp3.setPage(mockPage3);
    expect(mockContentDataService.saveOrUpdate(ssp3)).andReturn(ssp3).once();
    SSP ssp4 = createMock(SSP.class);
    expect(mockContentDataService.getSSP(date, PAGE_1_URL, pageMap.get(PAGE_1_URL), null, HttpStatus.SC_OK)).andReturn(ssp4).once();
    expect(ssp4.getHttpStatusCode()).andReturn(HttpStatus.SC_OK).once();
    expect(ssp4.getURI()).andReturn(PAGE_1_URL).once();
    ssp4.setDOM(readFile("htmlFiles/page-1_1.html", false));
    ssp4.setPage(mockPage4);
    expect(mockContentDataService.saveOrUpdate(ssp4)).andReturn(ssp4).once();
    SSP ssp5 = createMock(SSP.class);
    expect(mockContentDataService.getSSP(date, PAGE_ACCESS_FORBIDDEN_URL, pageMap.get(PAGE_ACCESS_FORBIDDEN_URL), null, HttpStatus.SC_OK)).andReturn(ssp5).once();
    expect(ssp5.getHttpStatusCode()).andReturn(HttpStatus.SC_OK).once();
    expect(ssp5.getURI()).andReturn(PAGE_ACCESS_FORBIDDEN_URL).once();
    ssp5.setDOM(readFile("htmlFiles/page-access-forbidden_1.html", false));
    ssp5.setPage(mockPage5);
    expect(mockContentDataService.saveOrUpdate(ssp5)).andReturn(ssp5).once();
    SSP ssp6 = createMock(SSP.class);
    expect(mockContentDataService.getSSP(date, PAGE_1_URL, pageMap.get(PAGE_1_URL), null, HttpStatus.SC_OK)).andReturn(ssp6).once();
    expect(ssp6.getHttpStatusCode()).andReturn(HttpStatus.SC_OK).once();
    expect(ssp6.getURI()).andReturn(PAGE_1_URL).once();
    ssp6.setDOM(readFile("htmlFiles/page-1_1.html", false));
    ssp6.setPage(mockPage6);
    expect(mockContentDataService.saveOrUpdate(ssp6)).andReturn(ssp6).once();
    SSP ssp7 = createMock(SSP.class);
    expect(mockContentDataService.getSSP(date, PAGE_2_URL, pageMap.get(PAGE_2_URL), null, HttpStatus.SC_OK)).andReturn(ssp7).once();
    expect(ssp7.getHttpStatusCode()).andReturn(HttpStatus.SC_OK).once();
    expect(ssp7.getURI()).andReturn(PAGE_2_URL).once();
    ssp7.setDOM(readFile("htmlFiles/page-2_1.html", false));
    ssp7.setPage(mockPage7);
    expect(mockContentDataService.saveOrUpdate(ssp7)).andReturn(ssp7).once();
    SSP ssp8 = createMock(SSP.class);
    expect(mockContentDataService.getSSP(date, PAGE_ACCESS_FORBIDDEN_URL, pageMap.get(PAGE_ACCESS_FORBIDDEN_URL), null, HttpStatus.SC_OK)).andReturn(ssp8).once();
    expect(ssp8.getHttpStatusCode()).andReturn(HttpStatus.SC_OK).once();
    expect(ssp8.getURI()).andReturn(PAGE_ACCESS_FORBIDDEN_URL).once();
    ssp8.setDOM(readFile("htmlFiles/page-access-forbidden_1.html", false));
    ssp8.setPage(mockPage8);
    expect(mockContentDataService.saveOrUpdate(ssp8)).andReturn(ssp8).once();
    Site mockSite = createMock(Site.class);
    //        expect(mockSite.getLabel())
    //                .andReturn("My Test Label")
    //                .once();
    mockSite.addChild(mockPage1);
    expectLastCall().once();
    mockSite.addChild(mockPage2);
    expectLastCall().once();
    mockSite.addChild(mockPage3);
    expectLastCall().once();
    mockSite.addChild(mockPage4);
    expectLastCall().once();
    mockSite.addChild(mockPage5);
    expectLastCall().once();
    mockSite.addChild(mockPage6);
    expectLastCall().once();
    mockSite.addChild(mockPage7);
    expectLastCall().once();
    mockSite.addChild(mockPage8);
    expectLastCall().once();
    expect(mockSite.getURL()).andReturn(ROOT_PAGE_URL).once();
    //        expect(mockWebResourceDataService.saveOrUpdate(mockSite))
    //                .andReturn(mockSite)
    //                .times(8);
    replay(mockPage1);
    replay(mockPage2);
    replay(mockPage3);
    replay(mockPage4);
    replay(mockPage5);
    replay(mockPage6);
    replay(mockPage7);
    replay(mockPage8);
    replay(mockSite);
    replay(mockWebResourceDataService);
    replay(mockDateFactory);
    replay(mockContentDataService);
    replay(ssp1);
    replay(ssp2);
    replay(ssp3);
    replay(ssp4);
    replay(ssp5);
    replay(ssp6);
    replay(ssp7);
    replay(ssp8);
    //        ScenarioLoaderImpl instance = new ScenarioLoaderImpl(
    //                mockSite,
    //                readFile("MyTest.json", true), 
    //                harFileContentLoaderFactory);
    //        instance.setContentDataService(mockContentDataService);
    //        instance.setContentFactory(mockContentFactory);
    //        instance.setDateFactory(mockDateFactory);
    //        instance.setWebResourceDataService(mockWebResourceDataService);
    //        
    //        instance.run(); 
    verify(mockPage1);
    verify(mockPage2);
    verify(mockPage3);
    verify(mockPage4);
    verify(mockPage5);
    verify(mockPage6);
    verify(mockPage7);
    verify(mockPage8);
    verify(mockSite);
    verify(mockWebResourceDataService);
    verify(mockDateFactory);
    verify(mockContentDataService);
    verify(ssp1);
    verify(ssp2);
    verify(ssp3);
    verify(ssp4);
    verify(ssp5);
    verify(ssp6);
    verify(ssp7);
    verify(ssp8);
}
Also used : Site(org.asqatasun.entity.subject.Site) SSP(org.asqatasun.entity.audit.SSP) WebResourceDataService(org.asqatasun.entity.service.subject.WebResourceDataService) Page(org.asqatasun.entity.subject.Page) DateFactory(org.asqatasun.util.factory.DateFactory) ContentDataService(org.asqatasun.entity.service.audit.ContentDataService)

Example 54 with Page

use of org.asqatasun.entity.subject.Page in project Asqatasun by Asqatasun.

the class ScenarioLoaderImpl method fireNewSSP.

/**
     * 
     * @param url
     * @param sourceCode 
     */
private void fireNewSSP(String url, String sourceCode, byte[] snapshotContent, Map<String, String> jsScriptMap) {
    LOGGER.debug("fire New SSP " + url);
    if (StringUtils.isEmpty(sourceCode)) {
        LOGGER.debug("Emtpy SSP " + url + " not saved");
        return;
    }
    String charset = UFT8;
    try {
        charset = CrawlUtils.extractCharset(IOUtils.toInputStream(sourceCode));
    } catch (IOException ex) {
        Logger.getLogger(this.getClass()).warn(ex);
    }
    Page page = getWebResource(url);
    SSP ssp = contentDataService.getSSP(dateFactory.createDate(), url, sourceCode, page, HttpStatus.SC_OK);
    ssp.setCharset(charset);
    contentDataService.saveOrUpdate(ssp);
    result.add(ssp);
    //        if (snapshotContent != null) {
    //            Snapshot snapshot = snapshotFactory.create(
    //                    page, 
    //                    snapshotContent);
    //            snapshotDataService.saveOrUpdate(snapshot);
    //        }
    Audit audit = null;
    if (page.getAudit() != null) {
        audit = page.getAudit();
    } else if (page.getParent().getAudit() != null) {
        audit = page.getParent().getAudit();
    }
    for (Map.Entry<String, String> entry : jsScriptMap.entrySet()) {
        PreProcessResult ppr = preProcessResultDataService.getPreProcessResult(entry.getKey(), entry.getValue(), audit, page);
        preProcessResultDataService.saveOrUpdate(ppr);
    }
}
Also used : SSP(org.asqatasun.entity.audit.SSP) Audit(org.asqatasun.entity.audit.Audit) PreProcessResult(org.asqatasun.entity.audit.PreProcessResult) Page(org.asqatasun.entity.subject.Page) IOException(java.io.IOException) Map(java.util.Map)

Example 55 with Page

use of org.asqatasun.entity.subject.Page in project Asqatasun by Asqatasun.

the class ScenarioLoaderImpl method getWebResource.

/**
     * 
     * @param url
     * @return the page instance for a given URL
     */
private Page getWebResource(String url) {
    Page page = null;
    if (webResource instanceof Page) {
        if (!StringUtils.equals(url, webResource.getURL())) {
            webResource.setURL(url);
        }
        page = (Page) webResource;
        page.setRank(1);
    } else if (webResource instanceof Site) {
        page = webResourceDataService.createPage(url);
        page.setRank(pageRank);
        pageRank++;
        ((Site) webResource).addChild(page);
    }
    page = (Page) webResourceDataService.saveOrUpdate(page);
    return page;
}
Also used : Site(org.asqatasun.entity.subject.Site) Page(org.asqatasun.entity.subject.Page)

Aggregations

Page (org.asqatasun.entity.subject.Page)77 Site (org.asqatasun.entity.subject.Site)68 WebResource (org.asqatasun.entity.subject.WebResource)7 ArrayList (java.util.ArrayList)6 Query (javax.persistence.Query)6 Audit (org.asqatasun.entity.audit.Audit)4 ProcessResult (org.asqatasun.entity.audit.ProcessResult)4 List (java.util.List)3 Map (java.util.Map)3 SSP (org.asqatasun.entity.audit.SSP)3 Content (org.asqatasun.entity.audit.Content)2 DefiniteResult (org.asqatasun.entity.audit.DefiniteResult)2 ForbiddenPageException (org.asqatasun.webapp.exception.ForbiddenPageException)2 ColumnBuilderException (ar.com.fdvs.dj.domain.builders.ColumnBuilderException)1 IOException (java.io.IOException)1 JRException (net.sf.jasperreports.engine.JRException)1 PreProcessResult (org.asqatasun.entity.audit.PreProcessResult)1 Scope (org.asqatasun.entity.reference.Scope)1 ContentDataService (org.asqatasun.entity.service.audit.ContentDataService)1 WebResourceDataService (org.asqatasun.entity.service.subject.WebResourceDataService)1