Search in sources :

Example 1 with NanopubHandler

use of org.nanopub.MultiNanopubRdfHandler.NanopubHandler in project nanopub-server by tkuhn.

the class CollectNanopubs method processPage.

private void processPage(int page, boolean isLastPage, long ignoreBeforePos) throws Exception {
    parent.stillAlive();
    logger.info("Process page " + page + " from " + peerInfo.getPublicUrl());
    loaded = 0;
    nextNp = (page - 1) * peerPageSize;
    List<String> toLoad = new ArrayList<>();
    boolean downloadAsPackage = false;
    for (String nanopubUri : NanopubServerUtils.loadNanopubUriList(peerInfo, page)) {
        parent.stillAlive();
        if (nextNp >= ignoreBeforePos) {
            String ac = TrustyUriUtils.getArtifactCode(nanopubUri);
            if (ac != null && ourPattern.matchesUri(nanopubUri) && !db.hasNanopub(ac)) {
                toLoad.add(ac);
                if (!isLastPage && toLoad.size() > 5) {
                    // Download entire package if more than 5 nanopubs are new
                    downloadAsPackage = true;
                    nextNp = (page - 1) * peerPageSize;
                    break;
                }
            }
        }
        nextNp++;
    }
    RequestConfig requestConfig = RequestConfig.custom().setConnectTimeout(5 * 1000).build();
    HttpClient c = HttpClientBuilder.create().setDefaultRequestConfig(requestConfig).build();
    watch = new StopWatch();
    watch.start();
    if (downloadAsPackage) {
        logger.info("Download page " + page + " as compressed package...");
        HttpGet get = new HttpGet(peerInfo.getPublicUrl() + "package.gz?page=" + page);
        get.setHeader("Accept", "application/x-gzip");
        HttpResponse resp = c.execute(get);
        InputStream in = null;
        try {
            if (wasSuccessful(resp)) {
                in = new GZIPInputStream(resp.getEntity().getContent());
            } else {
                logger.info("Failed. Trying uncompressed package...");
                // This is for compability with older versions; to be removed at some point...
                get = new HttpGet(peerInfo.getPublicUrl() + "package?page=" + page);
                get.setHeader("Accept", "application/trig");
                resp = c.execute(get);
                if (!wasSuccessful(resp)) {
                    logger.error("HTTP request failed: " + resp.getStatusLine().getReasonPhrase());
                    recordTime();
                    throw new RuntimeException(resp.getStatusLine().getReasonPhrase());
                }
                in = resp.getEntity().getContent();
            }
            MultiNanopubRdfHandler.process(RDFFormat.TRIG, in, new NanopubHandler() {

                @Override
                public void handleNanopub(Nanopub np) {
                    nextNp++;
                    if (watch.getTime() > 5 * 60 * 1000) {
                        // Downloading the whole package should never take more than 5 minutes.
                        logger.error("Downloading package took too long; interrupting");
                        recordTime();
                        throw new RuntimeException("Downloading package took too long; interrupting");
                    }
                    if (!ourPattern.matchesUri(np.getUri().stringValue()))
                        return;
                    try {
                        loadNanopub(np);
                    } catch (Exception ex) {
                        throw new RuntimeException(ex);
                    }
                }
            });
        } finally {
            if (in != null)
                in.close();
        }
    } else {
        logger.info("Download " + toLoad.size() + " nanopubs individually...");
        for (String ac : toLoad) {
            parent.stillAlive();
            HttpGet get = new HttpGet(peerInfo.getPublicUrl() + ac);
            get.setHeader("Accept", "application/trig");
            HttpResponse resp = c.execute(get);
            if (!wasSuccessful(resp)) {
                logger.error("HTTP request failed: " + resp.getStatusLine().getReasonPhrase());
                recordTime();
                throw new RuntimeException(resp.getStatusLine().getReasonPhrase());
            }
            InputStream in = null;
            try {
                in = resp.getEntity().getContent();
                loadNanopub(new NanopubImpl(in, RDFFormat.TRIG));
            } finally {
                if (in != null)
                    in.close();
            }
        }
    }
    recordTime();
    logger.info("Update peer state: " + peerInfo.getPublicUrl() + " at position " + nextNp);
    db.updatePeerState(peerInfo, nextNp);
}
Also used : RequestConfig(org.apache.http.client.config.RequestConfig) GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) HttpGet(org.apache.http.client.methods.HttpGet) Nanopub(org.nanopub.Nanopub) ArrayList(java.util.ArrayList) HttpResponse(org.apache.http.HttpResponse) StopWatch(org.apache.commons.lang.time.StopWatch) GZIPInputStream(java.util.zip.GZIPInputStream) HttpClient(org.apache.http.client.HttpClient) NanopubHandler(org.nanopub.MultiNanopubRdfHandler.NanopubHandler) NanopubImpl(org.nanopub.NanopubImpl)

Example 2 with NanopubHandler

use of org.nanopub.MultiNanopubRdfHandler.NanopubHandler in project nanopub-server by tkuhn.

the class LoadFiles method checkFilesToLoad.

private void checkFilesToLoad() {
    logger.info("Check whether there are files to load...");
    for (File f : loadDir.listFiles()) {
        stillAlive();
        if (f.isDirectory())
            continue;
        logger.info("Try to load file: " + f);
        try {
            final File processingFile = new File(processingDir, f.getName());
            f.renameTo(processingFile);
            RDFFormat format = Rio.getParserFormatForFileName(processingFile.getName());
            MultiNanopubRdfHandler.process(format, processingFile, new NanopubHandler() {

                @Override
                public void handleNanopub(Nanopub np) {
                    if (!ourPattern.matchesUri(np.getUri().toString()))
                        return;
                    try {
                        db.loadNanopub(np);
                    } catch (Exception ex) {
                        throw new RuntimeException(ex);
                    }
                    stillAlive();
                }
            });
            processingFile.renameTo(new File(doneDir, f.getName()));
            logger.info("File loaded: " + processingFile);
        } catch (Exception ex) {
            logger.error("Failed to load file: " + f, ex);
        }
    }
}
Also used : NanopubHandler(org.nanopub.MultiNanopubRdfHandler.NanopubHandler) Nanopub(org.nanopub.Nanopub) File(java.io.File) RDFFormat(org.openrdf.rio.RDFFormat)

Aggregations

NanopubHandler (org.nanopub.MultiNanopubRdfHandler.NanopubHandler)2 Nanopub (org.nanopub.Nanopub)2 File (java.io.File)1 InputStream (java.io.InputStream)1 ArrayList (java.util.ArrayList)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 StopWatch (org.apache.commons.lang.time.StopWatch)1 HttpResponse (org.apache.http.HttpResponse)1 HttpClient (org.apache.http.client.HttpClient)1 RequestConfig (org.apache.http.client.config.RequestConfig)1 HttpGet (org.apache.http.client.methods.HttpGet)1 NanopubImpl (org.nanopub.NanopubImpl)1 RDFFormat (org.openrdf.rio.RDFFormat)1