Search in sources :

Example 1 with Sitemapindex

use of nl.knaw.huygens.timbuctoo.remote.rs.xml.Sitemapindex in project timbuctoo by HuygensING.

the class ResultIndexPivotTest method testSelections.

@SuppressWarnings("unchecked")
@Test
public void testSelections() throws Exception {
    final ResultIndex index = new ResultIndex();
    Result<Urlset> result1 = new Result<>(new URI("doc1"));
    result1.accept(new Urlset(new RsMd((Capability.RESOURCELIST.xmlValue))));
    result1.addError(new RemoteResourceSyncFrameworkException("Bla1"));
    index.add(result1);
    Result<Urlset> result2 = new Result<>(new URI("doc2"));
    result2.accept(new Urlset(new RsMd((Capability.CAPABILITYLIST.xmlValue))));
    index.add(result2);
    Result<Sitemapindex> result3 = new Result<>(new URI("doc3"));
    result3.accept(new Sitemapindex(new RsMd(Capability.CHANGELIST.xmlValue)));
    index.add(result3);
    Result<Sitemapindex> result4 = new Result<>(new URI("doc4"));
    result4.addError(new RemoteResourceSyncFrameworkException("Bla4"));
    index.add(result4);
    Result<Sitemapindex> result5 = new Result<>(new URI("doc5"));
    result5.accept(new Sitemapindex(new RsMd((Capability.CAPABILITYLIST.xmlValue))));
    index.add(result5);
    ResultIndexPivot pivot = new ResultIndexPivot(index);
    List<Throwable> errorList = pivot.listErrors();
    assertThat(errorList.stream().map(Throwable::getMessage).collect(Collectors.toList()), containsInAnyOrder("Bla1", "Bla4"));
    assertThat(errorList.size(), equalTo(2));
    List<Result<?>> errorResultList = pivot.listErrorResults();
    assertThat(errorResultList, containsInAnyOrder(result1, result4));
    assertThat(errorResultList.size(), equalTo(2));
    List<Result<?>> resultList = pivot.listResultsWithContent();
    assertThat(resultList, containsInAnyOrder(result1, result2, result3, result5));
    assertThat(resultList.size(), equalTo(4));
    List<Result<Urlset>> setResultList = pivot.listUrlsetResults();
    assertThat(setResultList, containsInAnyOrder(result1, result2));
    assertThat(setResultList.size(), equalTo(2));
    List<Result<Sitemapindex>> indexResultList = pivot.listSitemapindexResults();
    assertThat(indexResultList.size(), equalTo(2));
    assertThat(indexResultList, containsInAnyOrder(result3, result5));
    List<Result<Urlset>> capabilityListSetResults = pivot.listUrlsetResults(Capability.CAPABILITYLIST);
    assertThat(capabilityListSetResults.size(), equalTo(1));
    assertThat(capabilityListSetResults, containsInAnyOrder(result2));
    List<Result<Sitemapindex>> capabilityListIndexResults = pivot.listSitemapindexResults(Capability.CAPABILITYLIST);
    assertThat(capabilityListIndexResults.size(), equalTo(1));
    assertThat(capabilityListIndexResults, containsInAnyOrder(result5));
    List<Result<RsRoot>> capabilityListResults = pivot.listRsRootResults(Capability.CAPABILITYLIST);
    assertThat(capabilityListResults.size(), equalTo(2));
    assertThat(capabilityListResults, containsInAnyOrder(result2, result5));
    capabilityListResults = pivot.listRsRootResultsByLevel(0);
    // capabilityListResults.stream().forEach(rsRootResult -> System.out.println(rsRootResult.getUri()));
    assertThat(capabilityListResults.size(), equalTo(0));
    capabilityListResults = pivot.listRsRootResultsByLevel(1);
    // capabilityListResults.stream().forEach(rsRootResult -> System.out.println(rsRootResult.getUri()));
    assertThat(capabilityListResults.size(), equalTo(2));
    assertThat(capabilityListResults, containsInAnyOrder(result1, result3));
    capabilityListResults = pivot.listRsRootResultsByLevel(2);
    // capabilityListResults.stream().forEach(rsRootResult -> System.out.println(rsRootResult.getUri()));
    assertThat(capabilityListResults.size(), equalTo(2));
    assertThat(capabilityListResults, containsInAnyOrder(result2, result5));
    capabilityListResults = pivot.listRsRootResultsByLevel(3);
    // capabilityListResults.stream().forEach(rsRootResult -> System.out.println(rsRootResult.getUri()));
    assertThat(capabilityListResults.size(), equalTo(0));
    capabilityListResults = pivot.listRsRootResultsByLevel(-1);
    // capabilityListResults.stream().forEach(rsRootResult -> System.out.println(rsRootResult.getUri()));
    assertThat(capabilityListResults.size(), equalTo(0));
}
Also used : URI(java.net.URI) Urlset(nl.knaw.huygens.timbuctoo.remote.rs.xml.Urlset) RsMd(nl.knaw.huygens.timbuctoo.remote.rs.xml.RsMd) Sitemapindex(nl.knaw.huygens.timbuctoo.remote.rs.xml.Sitemapindex) Test(org.junit.Test)

Example 2 with Sitemapindex

use of nl.knaw.huygens.timbuctoo.remote.rs.xml.Sitemapindex in project timbuctoo by HuygensING.

the class RsExplorer method explore.

@SuppressWarnings("unchecked")
@Override
public Result<RsRoot> explore(URI uri, ResultIndex index) {
    LOG.debug("Exploring URI " + uri);
    Result<RsRoot> result = execute(uri, getSitemapConverter());
    index.add(result);
    Capability capability = extractCapability(result);
    if (followParentLinks) {
        // rs:ln rel="up" -> points to parent document, a urlset.
        String parentLink = result.getContent().map(rsRoot -> rsRoot.getLinkHref("up")).orElse(null);
        if (parentLink != null && !index.contains(parentLink)) {
            try {
                URI parentUri = new URI(parentLink);
                Result<RsRoot> parentResult = explore(parentUri, index);
                result.addParent(parentResult);
                verifyUpRelation(result, parentResult, capability);
            } catch (URISyntaxException e) {
                index.addInvalidUri(parentLink);
                result.addError(e);
                result.addInvalidUri(parentLink);
            }
        }
    }
    if (followIndexLinks) {
        // rs:ln rel="index" -> points to parent index, a sitemapindex.
        String indexLink = result.getContent().map(rsRoot -> rsRoot.getLinkHref("index")).orElse(null);
        if (indexLink != null && !index.contains(indexLink)) {
            try {
                URI indexUri = new URI(indexLink);
                Result<RsRoot> indexResult = explore(indexUri, index);
                result.addParent(indexResult);
                verifyIndexRelation(result, indexResult, capability);
            } catch (URISyntaxException e) {
                index.addInvalidUri(indexLink);
                result.addError(e);
                result.addInvalidUri(indexLink);
            }
        }
    }
    if (followChildLinks) {
        // elements <url> or <sitemap> have the location of the children of result.
        // children of Urlset with capability resourcelist, resourcedump, changelist, changedump
        // are the resources them selves. do not explore these with this explorer.
        String xmlString = result.getContent().map(RsRoot::getMetadata).flatMap(RsMd::getCapability).orElse("invalid");
        boolean isSitemapindex = result.getContent().map(rsRoot -> rsRoot instanceof Sitemapindex).orElse(false);
        if (Capability.levelfor(xmlString) > Capability.RESOURCELIST.level || isSitemapindex) {
            List<RsItem> itemList = result.getContent().map(RsRoot::getItemList).orElse(Collections.emptyList());
            for (RsItem item : itemList) {
                String childLink = item.getLoc();
                if (childLink != null && !index.contains(childLink)) {
                    try {
                        URI childUri = new URI(childLink);
                        Result<RsRoot> childResult = explore(childUri, index);
                        result.addChild(childResult);
                        verifyChildRelation(result, childResult, capability);
                        Optional<RsLn> maybeDescribedByLink = item.getLink("describedBy");
                        maybeDescribedByLink.ifPresent(rsLn -> loadDescriptionIfApplicable(childResult, rsLn, index));
                    } catch (URISyntaxException e) {
                        index.addInvalidUri(childLink);
                        result.addError(e);
                        result.addInvalidUri(childLink);
                    }
                }
            }
        }
    }
    Optional<RsLn> maybeDescribedByLink = result.getContent().flatMap(rsRoot -> rsRoot.getLink("describedBy"));
    maybeDescribedByLink.ifPresent(rsLn -> loadDescriptionIfApplicable(result, rsLn, index));
    return result;
}
Also used : Capability(nl.knaw.huygens.timbuctoo.remote.rs.xml.Capability) LambdaExceptionUtil(nl.knaw.huygens.timbuctoo.util.LambdaExceptionUtil) CloseableHttpClient(org.apache.http.impl.client.CloseableHttpClient) Arrays(java.util.Arrays) Logger(org.slf4j.Logger) URISyntaxException(java.net.URISyntaxException) RsMd(nl.knaw.huygens.timbuctoo.remote.rs.xml.RsMd) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) Sitemapindex(nl.knaw.huygens.timbuctoo.remote.rs.xml.Sitemapindex) RsRoot(nl.knaw.huygens.timbuctoo.remote.rs.xml.RsRoot) RsLn(nl.knaw.huygens.timbuctoo.remote.rs.xml.RsLn) Urlset(nl.knaw.huygens.timbuctoo.remote.rs.xml.Urlset) HttpResponse(org.apache.http.HttpResponse) Optional(java.util.Optional) ResourceSyncContext(nl.knaw.huygens.timbuctoo.remote.rs.xml.ResourceSyncContext) RsItem(nl.knaw.huygens.timbuctoo.remote.rs.xml.RsItem) RsBuilder(nl.knaw.huygens.timbuctoo.remote.rs.xml.RsBuilder) URI(java.net.URI) Collections(java.util.Collections) InputStream(java.io.InputStream) Capability(nl.knaw.huygens.timbuctoo.remote.rs.xml.Capability) RsLn(nl.knaw.huygens.timbuctoo.remote.rs.xml.RsLn) RsRoot(nl.knaw.huygens.timbuctoo.remote.rs.xml.RsRoot) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI) RsItem(nl.knaw.huygens.timbuctoo.remote.rs.xml.RsItem) Sitemapindex(nl.knaw.huygens.timbuctoo.remote.rs.xml.Sitemapindex)

Aggregations

URI (java.net.URI)2 RsMd (nl.knaw.huygens.timbuctoo.remote.rs.xml.RsMd)2 Sitemapindex (nl.knaw.huygens.timbuctoo.remote.rs.xml.Sitemapindex)2 Urlset (nl.knaw.huygens.timbuctoo.remote.rs.xml.Urlset)2 InputStream (java.io.InputStream)1 URISyntaxException (java.net.URISyntaxException)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 List (java.util.List)1 Optional (java.util.Optional)1 Capability (nl.knaw.huygens.timbuctoo.remote.rs.xml.Capability)1 ResourceSyncContext (nl.knaw.huygens.timbuctoo.remote.rs.xml.ResourceSyncContext)1 RsBuilder (nl.knaw.huygens.timbuctoo.remote.rs.xml.RsBuilder)1 RsItem (nl.knaw.huygens.timbuctoo.remote.rs.xml.RsItem)1 RsLn (nl.knaw.huygens.timbuctoo.remote.rs.xml.RsLn)1 RsRoot (nl.knaw.huygens.timbuctoo.remote.rs.xml.RsRoot)1 LambdaExceptionUtil (nl.knaw.huygens.timbuctoo.util.LambdaExceptionUtil)1 IOUtils (org.apache.commons.io.IOUtils)1 HttpResponse (org.apache.http.HttpResponse)1 CloseableHttpClient (org.apache.http.impl.client.CloseableHttpClient)1