Search in sources :

Example 1 with Content

use of org.asqatasun.entity.audit.Content in project Asqatasun by Asqatasun.

the class CrawlerServiceImplTest method initialiseAndLaunchCrawl.

/**
     *
     * @param siteUrl
     * @param depth
     * @param regexp
     * @param maxDuration
     * @param maxDocuments
     * @param proxyHost
     * @param proxyPort
     * @return
     */
private List<Content> initialiseAndLaunchCrawl(String siteUrl, String depth, String exlusionRegexp, String inlusionRegexp, String maxDuration, String maxDocuments) {
    Audit audit = new AuditImpl();
    audit.setParameterSet(setCrawlParameters(depth, exlusionRegexp, inlusionRegexp, maxDuration, maxDocuments));
    WebResource site = crawlerService.crawlSite(audit, siteUrl);
    Collection<Long> contentListId = mockContentDataService.getSSPIdsFromWebResource(site.getId(), HttpStatus.SC_OK, 0, 10);
    List<Content> contentList = new ArrayList();
    for (Long id : contentListId) {
        Content content = mockContentDataService.readWithRelatedContent(id, false);
        if (content != null) {
            System.out.println(content.getURI() + "  " + content.getClass());
            contentList.add(content);
        }
    }
    return contentList;
}
Also used : AuditImpl(org.asqatasun.entity.audit.AuditImpl) Audit(org.asqatasun.entity.audit.Audit) Content(org.asqatasun.entity.audit.Content) WebResource(org.asqatasun.entity.subject.WebResource)

Example 2 with Content

use of org.asqatasun.entity.audit.Content in project Asqatasun by Asqatasun.

the class ScenarioLoaderServiceImplTest method testLoadScenario.

/**
     * Test of loadScenario method, of class ScenarioLoaderServiceImpl.
     */
public void testLoadScenario() {
    System.out.println("loadScenario");
    String scenarioFile = "My Scenario";
    WebResource mockWebResource = createMock(WebResource.class);
    Audit mockAudit = createMock(Audit.class);
    expect(mockWebResource.getAudit()).andReturn(mockAudit).once();
    ScenarioLoaderFactory mockScenarioLoaderFactory = createMock(ScenarioLoaderFactory.class);
    ScenarioLoader mockScenarioLoader = createMock(ScenarioLoader.class);
    expect(mockScenarioLoaderFactory.create(mockWebResource, scenarioFile)).andReturn(mockScenarioLoader).once();
    mockScenarioLoader.run();
    expectLastCall().once();
    List<Content> contentList = new ArrayList<Content>();
    expect(mockScenarioLoader.getResult()).andReturn(contentList).once();
    ContentDataService mockContentDataService = createMock(ContentDataService.class);
    WebResourceDataService mockWebResourceDataService = createMock(WebResourceDataService.class);
    expect(mockWebResourceDataService.saveOrUpdate(mockWebResource)).andReturn(mockWebResource).once();
    replay(mockAudit);
    replay(mockWebResource);
    replay(mockWebResourceDataService);
    replay(mockContentDataService);
    replay(mockScenarioLoader);
    replay(mockScenarioLoaderFactory);
    ScenarioLoaderServiceImpl instance = new ScenarioLoaderServiceImpl();
    instance.setContentDataService(mockContentDataService);
    instance.setScenarioLoaderFactory(mockScenarioLoaderFactory);
    instance.setWebResourceDataService(mockWebResourceDataService);
    instance.loadScenario(mockWebResource, scenarioFile);
    verify(mockAudit);
    verify(mockWebResource);
    verify(mockWebResourceDataService);
    verify(mockContentDataService);
    verify(mockScenarioLoader);
    verify(mockScenarioLoaderFactory);
}
Also used : Audit(org.asqatasun.entity.audit.Audit) ScenarioLoader(org.asqatasun.scenarioloader.ScenarioLoader) Content(org.asqatasun.entity.audit.Content) WebResourceDataService(org.asqatasun.entity.service.subject.WebResourceDataService) ArrayList(java.util.ArrayList) WebResource(org.asqatasun.entity.subject.WebResource) ScenarioLoaderFactory(org.asqatasun.scenarioloader.ScenarioLoaderFactory) ContentDataService(org.asqatasun.entity.service.audit.ContentDataService)

Example 3 with Content

use of org.asqatasun.entity.audit.Content in project Asqatasun by Asqatasun.

the class ScenarioLoaderServiceImpl method loadScenario.

@Override
public List<Content> loadScenario(WebResource webResource, String scenarioFile) {
    Audit audit = webResource.getAudit();
    ScenarioLoader scenarioLoader = scenarioLoaderFactory.create(webResource, scenarioFile);
    scenarioLoader.run();
    List<Content> contentList = scenarioLoader.getResult();
    for (Content content : contentList) {
        //            content.setAudit(audit);
        contentDataService.saveAuditToContent(content.getId(), audit.getId());
    }
    // Before returning the list of content we save the webResource
    webResourceDataService.saveOrUpdate(webResource);
    return contentList;
}
Also used : Audit(org.asqatasun.entity.audit.Audit) ScenarioLoader(org.asqatasun.scenarioloader.ScenarioLoader) Content(org.asqatasun.entity.audit.Content)

Example 4 with Content

use of org.asqatasun.entity.audit.Content in project Asqatasun by Asqatasun.

the class ContentsAdapterImpl method run.

private Collection<Content> run(Collection<Content> contentList) {
    Collection<Content> localResult = new ArrayList<>();
    for (Content content : contentList) {
        // We only handle here the fetched content (HttpStatus=200)
        if (content instanceof SSP && content.getHttpStatusCode() == 200) {
            LOGGER.debug("Adapting " + content.getURI());
            SSP ssp = (SSP) content;
            htmlCleaner.setDirtyHTML(ssp.getSource());
            htmlCleaner.run();
            ssp.setAdaptedContent(htmlCleaner.getResult());
            htmlCleaner.setDirtyHTML(null);
            writeCleanDomInFile(ssp);
            if (parseAndRetrievelRelatedContent) {
                htmlParser.setSSP(ssp);
                htmlParser.run();
            } else {
                LOGGER.debug("no Html parse executed for the current audit");
            }
            if (xmlizeContent) {
                AbstractHTMLCleaner cleaner = new HTMLCleanerImpl();
                cleaner.setDirtyHTML(ssp.getAdaptedContent());
                cleaner.run();
                ssp.setAdaptedContent(DocumentCaseInsensitiveAdapter.removeLowerCaseTags(cleaner.getResult()));
            }
            localResult.add(ssp);
        }
    }
    return localResult;
}
Also used : SSP(org.asqatasun.entity.audit.SSP) HTMLCleanerImpl(org.asqatasun.contentadapter.html.HTMLCleanerImpl) Content(org.asqatasun.entity.audit.Content) AbstractHTMLCleaner(org.asqatasun.contentadapter.html.AbstractHTMLCleaner)

Example 5 with Content

use of org.asqatasun.entity.audit.Content in project Asqatasun by Asqatasun.

the class ContentDAOImplTest method testFindOrphanContentList.

@Test
public void testFindOrphanContentList() {
    WebResource wr = webresourceDAO.read(Long.valueOf(1));
    assertEquals(Long.valueOf(2), contentDAO.findNumberOfOrphanContentFromWebResource(wr));
    List<Content> contentList = contentDAO.findOrphanContentList(wr, 0, 100);
    assertEquals(2, contentList.size());
    assertEquals(Long.valueOf(3), contentDAO.findNumberOfOrphanRelatedContentFromWebResource(wr));
    contentList = contentDAO.findOrphanRelatedContentList(wr, 0, 10);
    assertEquals(3, contentList.size());
    wr = webresourceDAO.read(Long.valueOf(2));
    contentList = contentDAO.findOrphanContentList(wr, 0, 10);
    assertEquals(1, contentList.size());
    contentList = contentDAO.findOrphanRelatedContentList(wr, 0, 10);
    assertEquals(1, contentList.size());
}
Also used : Content(org.asqatasun.entity.audit.Content) WebResource(org.asqatasun.entity.subject.WebResource) Test(org.junit.Test)

Aggregations

Content (org.asqatasun.entity.audit.Content)15 WebResource (org.asqatasun.entity.subject.WebResource)9 ArrayList (java.util.ArrayList)7 Audit (org.asqatasun.entity.audit.Audit)4 SSP (org.asqatasun.entity.audit.SSP)4 ContentDataService (org.asqatasun.entity.service.audit.ContentDataService)3 ContentLoader (org.asqatasun.contentloader.ContentLoader)2 ContentLoaderFactory (org.asqatasun.contentloader.ContentLoaderFactory)2 AuditImpl (org.asqatasun.entity.audit.AuditImpl)2 RelatedContent (org.asqatasun.entity.audit.RelatedContent)2 Page (org.asqatasun.entity.subject.Page)2 Site (org.asqatasun.entity.subject.Site)2 ScenarioLoader (org.asqatasun.scenarioloader.ScenarioLoader)2 DateFactory (org.asqatasun.util.factory.DateFactory)2 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1 NoSuchAlgorithmException (java.security.NoSuchAlgorithmException)1 HashMap (java.util.HashMap)1 PersistenceException (javax.persistence.PersistenceException)1 AbstractHTMLCleaner (org.asqatasun.contentadapter.html.AbstractHTMLCleaner)1 HTMLCleanerImpl (org.asqatasun.contentadapter.html.HTMLCleanerImpl)1