Search in sources :

Example 1 with LogEntry

use of nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogEntry in project timbuctoo by HuygensING.

the class ImportManager method processLogsUntil.

private synchronized ImportStatus processLogsUntil(int maxIndex) {
    importStatus.start(this.getClass().getSimpleName() + ".processLogs", null);
    ListIterator<LogEntry> unprocessed = logListStore.getData().getUnprocessed();
    boolean dataWasAdded = false;
    while (unprocessed.hasNext() && unprocessed.nextIndex() <= maxIndex) {
        int index = unprocessed.nextIndex();
        LogEntry entry = unprocessed.next();
        importStatus.startEntry(entry);
        if (entry.getLogToken().isPresent()) {
            // logToken
            String logToken = entry.getLogToken().get();
            try (CachedLog log = logStorage.getLog(logToken)) {
                final Stopwatch stopwatch = Stopwatch.createStarted();
                for (RdfProcessor processor : subscribedProcessors) {
                    if (processor.getCurrentVersion() <= index) {
                        String msg = "******* " + processor.getClass().getSimpleName() + " Started importing full log...";
                        LOG.info(msg);
                        importStatus.setStatus(msg);
                        RdfParser rdfParser = serializerFactory.makeRdfParser(log);
                        processor.start(index);
                        rdfParser.importRdf(log, entry.getBaseUri(), entry.getDefaultGraph(), processor);
                        processor.commit();
                    }
                }
                long elapsedTime = stopwatch.elapsed(TimeUnit.SECONDS);
                String msg = "Finished importing. Total import took " + elapsedTime + " seconds.";
                LOG.info(msg);
                importStatus.setStatus(msg);
                dataWasAdded = true;
            } catch (Exception e) {
                LOG.error("Processing log failed", e);
                importStatus.addError("Processing log failed", e);
            }
            // Update the log, even after RdfProcessingFailedException | IOException
            try {
                logListStore.updateData(logList -> {
                    logList.markAsProcessed(index);
                    return logList;
                });
            } catch (IOException e) {
                LOG.error("Updating the log failed", e);
                importStatus.addError("Updating log failed", e);
            }
        } else {
            // no logToken
            RdfCreator creator = entry.getRdfCreator().get();
            String token = "";
            MediaType mediaType;
            Optional<Charset> charset;
            File tempFile = null;
            try {
                tempFile = File.createTempFile("log_to_generate", "nq");
                try (OutputStream stream = new GZIPOutputStream(new FileOutputStream(tempFile))) {
                    if (creator instanceof PlainRdfCreator) {
                        try (RdfSerializer serializer = serializerFactory.makeRdfSerializer(stream)) {
                            mediaType = serializer.getMediaType();
                            charset = Optional.of(serializer.getCharset());
                            ((PlainRdfCreator) creator).sendQuads(serializer, dataSet, importStatus::setStatus);
                        } catch (Exception e) {
                            LOG.error("Log generation failed", e);
                            importStatus.addError("Log generation failed", e);
                            break;
                        }
                    } else {
                        try (RdfPatchSerializer srlzr = serializerFactory.makeRdfPatchSerializer(stream, entry.getBaseUri())) {
                            mediaType = srlzr.getMediaType();
                            charset = Optional.of(srlzr.getCharset());
                            ((PatchRdfCreator) creator).sendQuads(srlzr, importStatus::setStatus, dataSet);
                        } catch (Exception e) {
                            LOG.error("Log generation failed", e);
                            importStatus.addError("Log generation failed", e);
                            break;
                        }
                    }
                }
                try (InputStream inputStream = new GZIPInputStream(new FileInputStream(tempFile))) {
                    token = logStorage.saveLog(inputStream, "log_generated_by_" + creator.getClass().getSimpleName(), mediaType, charset);
                }
                LogEntry entryWithLog;
                entryWithLog = LogEntry.addLogToEntry(entry, token);
                unprocessed.set(entryWithLog);
                token = "";
                // move back to process this item again
                unprocessed.previous();
            } catch (Exception e) {
                if (token.isEmpty()) {
                    LOG.error("Log processing failed", e);
                } else {
                    LOG.error("Log processing failed. Log created but not added to the list!", e);
                }
                importStatus.addError("Log processing failed", e);
                break;
            } finally {
                if (tempFile != null) {
                    tempFile.delete();
                }
            }
        }
        // end else with no condition
        importStatus.finishEntry();
    }
    // end main while loop
    if (dataWasAdded) {
        webhooks.run();
    }
    importStatus.finishList();
    // update log.json
    try {
        logListStore.updateData(Function.identity());
    } catch (IOException e) {
        LOG.error("Updating the log failed", e);
        importStatus.addError("Updating log failed", e);
    }
    return importStatus;
}
Also used : OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) GZIPOutputStream(java.util.zip.GZIPOutputStream) Stopwatch(com.google.common.base.Stopwatch) GZIPInputStream(java.util.zip.GZIPInputStream) GZIPOutputStream(java.util.zip.GZIPOutputStream) MediaType(javax.ws.rs.core.MediaType) CachedLog(nl.knaw.huygens.timbuctoo.v5.filestorage.dto.CachedLog) LogEntry(nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogEntry) GZIPInputStream(java.util.zip.GZIPInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) Charset(java.nio.charset.Charset) IOException(java.io.IOException) LogStorageFailedException(nl.knaw.huygens.timbuctoo.v5.filestorage.exceptions.LogStorageFailedException) IOException(java.io.IOException) FileStorageFailedException(nl.knaw.huygens.timbuctoo.v5.filestorage.exceptions.FileStorageFailedException) FileInputStream(java.io.FileInputStream) RdfSerializer(nl.knaw.huygens.timbuctoo.v5.rdfio.RdfSerializer) RdfPatchSerializer(nl.knaw.huygens.timbuctoo.v5.rdfio.RdfPatchSerializer) FileOutputStream(java.io.FileOutputStream) RdfCreator(nl.knaw.huygens.timbuctoo.v5.dataset.dto.RdfCreator) CachedFile(nl.knaw.huygens.timbuctoo.v5.filestorage.dto.CachedFile) File(java.io.File) RdfParser(nl.knaw.huygens.timbuctoo.v5.rdfio.RdfParser)

Example 2 with LogEntry

use of nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogEntry in project timbuctoo by HuygensING.

the class ImportStatusTest method jsonSerialization.

@Test
public void jsonSerialization() throws Exception {
    LogList logList = new LogList();
    LogEntry entry = LogEntry.create("baseUri", "defaultGraph", "token");
    logList.addEntry(entry);
    ImportStatus status = new ImportStatus(logList);
    status.start("method", "baseUri");
    assertThat(status.getStatus(), is("Started method"));
    status.addError("This error is recorded in logList", new RuntimeException("list"));
    assertThat(status.getStatus().contains("This error is recorded in logList"), is(true));
    assertThat(logList.getListErrors().get(0).contains("This error is recorded in logList"), is(true));
    status.startEntry(entry);
    assertThat(status.getStatus(), is("Adding entry with token token"));
    status.addError("This error is recorded in logEntry", new RuntimeException("entry"));
    assertThat(status.getStatus().contains("This error is recorded in logEntry"), is(true));
    assertThat(entry.getImportStatus().get().getErrors().get(0).contains("This error is recorded in logEntry"), is(true));
    status.finishEntry();
    status.finishList();
    String json = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT).writeValueAsString(status);
    // System.out.println(json);
    assertThat(json.contains("\"@type\" : \"ImportStatus\""), is(true));
}
Also used : LogList(nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogList) LogEntry(nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogEntry) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 3 with LogEntry

use of nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogEntry in project timbuctoo by HuygensING.

the class RsDocumentBuilder method getResourceList.

/**
 * Get the resource list for the dataSet denoted by <code>ownerId</code> and <code>dataSetId</code>.
 * The {@link Optional} is empty if the dataSet is not published and the given <code>user</code> == <code>null</code>
 * or has no read access for the dataSet or the dataSet does not exist.
 *
 * @param user User that requests the list, may be <code>null</code>
 * @param ownerId ownerId
 * @param dataSetId dataSetId
 * @return the resource list for the dataSet denoted by <code>ownerId</code> and <code>dataSetId</code>
 */
public Optional<Urlset> getResourceList(@Nullable User user, String ownerId, String dataSetId) throws IOException {
    Urlset resourceList = null;
    Optional<DataSet> maybeDataSet = dataSetRepository.getDataSet(user, ownerId, dataSetId);
    if (maybeDataSet.isPresent()) {
        DataSetMetaData dataSetMetaData = maybeDataSet.get().getMetadata();
        LogList loglist = maybeDataSet.get().getImportManager().getLogList();
        RsMd rsMd = new RsMd(Capability.RESOURCELIST.xmlValue).withAt(// lastImportDate set on server startup?
        ZonedDateTime.parse(loglist.getLastImportDate()));
        resourceList = new Urlset(rsMd).addLink(new RsLn(REL_UP, rsUriHelper.uriForRsDocument(dataSetMetaData, Capability.CAPABILITYLIST)));
        FileStorage fileStorage = maybeDataSet.get().getFileStorage();
        List<LogEntry> entries = loglist.getEntries();
        entries.sort((e1, e2) -> {
            if (e1.getImportStatus().isPresent() && e2.getImportStatus().isPresent()) {
                return e1.getImportStatus().get().getDate().compareTo(e2.getImportStatus().get().getDate());
            } else if (e1.getImportStatus().isPresent()) {
                return 1;
            } else {
                return -1;
            }
        });
        for (LogEntry logEntry : entries) {
            Optional<String> maybeToken = logEntry.getLogToken();
            if (maybeToken.isPresent()) {
                String loc = rsUriHelper.uriForToken(dataSetMetaData, maybeToken.get());
                Optional<CachedFile> maybeCachedFile = fileStorage.getFile(maybeToken.get());
                if (maybeCachedFile.isPresent()) {
                    UrlItem item = new UrlItem(loc).withMetadata(new RsMd().withType(maybeCachedFile.get().getMimeType().toString()));
                    resourceList.addItem(item);
                }
            }
        }
        rsMd.withCompleted(ZonedDateTime.now(ZoneOffset.UTC));
    }
    return Optional.ofNullable(resourceList);
}
Also used : CachedFile(nl.knaw.huygens.timbuctoo.v5.filestorage.dto.CachedFile) DataSet(nl.knaw.huygens.timbuctoo.v5.dataset.dto.DataSet) RsLn(nl.knaw.huygens.timbuctoo.remote.rs.xml.RsLn) UrlItem(nl.knaw.huygens.timbuctoo.remote.rs.xml.UrlItem) LogList(nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogList) Urlset(nl.knaw.huygens.timbuctoo.remote.rs.xml.Urlset) FileStorage(nl.knaw.huygens.timbuctoo.v5.filestorage.FileStorage) DataSetMetaData(nl.knaw.huygens.timbuctoo.v5.dataset.dto.DataSetMetaData) RsMd(nl.knaw.huygens.timbuctoo.remote.rs.xml.RsMd) LogEntry(nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogEntry)

Example 4 with LogEntry

use of nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogEntry in project timbuctoo by HuygensING.

the class ImportManagerTest method addLogSavesTheLogToDisk.

@Test
public void addLogSavesTheLogToDisk() throws Exception {
    File file = FileHelpers.getFileFromResource(ImportManagerTest.class, "clusius.ttl").toFile();
    String name = "http://example.com/clusius.ttl";
    String defaultGraph = "http://example.com/defaultGraph";
    String baseUri = "http://example.com/baseUri";
    Future<ImportStatus> promise = importManager.addLog(baseUri, defaultGraph, name, new FileInputStream(file), Optional.of(Charsets.UTF_8), MediaType.valueOf("text/turtle"));
    ImportStatus status = promise.get();
    assertThat(status.getErrorCount(), is((0)));
    LogEntry logEntry = importManager.getLogEntries().get(0);
    assertThat(logEntry.getBaseUri(), is(baseUri));
    assertThat(logEntry.getDefaultGraph(), is(defaultGraph));
    // The first character is an @. if we can read that we apparently can access the file
    assertThat(fileStorage.getLog(logEntry.getLogToken().get()).getReader().read(), is(64));
}
Also used : CachedFile(nl.knaw.huygens.timbuctoo.v5.filestorage.dto.CachedFile) File(java.io.File) FileInputStream(java.io.FileInputStream) LogEntry(nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogEntry) Test(org.junit.Test)

Example 5 with LogEntry

use of nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogEntry in project timbuctoo by HuygensING.

the class ImportManagerTest method generateLogSavesTheLogAndCallsTheStores.

@Test
public void generateLogSavesTheLogAndCallsTheStores() throws Exception {
    String defaultGraph = "http://example.com/defaultGraph";
    String baseUri = "http://example.com/baseUri";
    CountingProcessor processor = new CountingProcessor();
    importManager.subscribeToRdf(processor);
    Future<ImportStatus> promise = importManager.generateLog(baseUri, defaultGraph, new DummyRdfCreator());
    ImportStatus status = promise.get();
    assertThat(status.hasErrors(), is(false));
    assertThat(processor.getCounter(), is(3));
    LogEntry logEntry = importManager.getLogEntries().get(0);
    assertThat(logEntry.getBaseUri(), is(baseUri));
    assertThat(logEntry.getDefaultGraph(), is(defaultGraph));
    // The first character is an < (start of a uri in nquads) if we can read that we apparently can access the file
    assertThat(fileStorage.getLog(logEntry.getLogToken().get()).getReader().read(), is(60));
}
Also used : LogEntry(nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogEntry) Test(org.junit.Test)

Aggregations

LogEntry (nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogEntry)5 CachedFile (nl.knaw.huygens.timbuctoo.v5.filestorage.dto.CachedFile)3 Test (org.junit.Test)3 File (java.io.File)2 FileInputStream (java.io.FileInputStream)2 LogList (nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogList)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 Stopwatch (com.google.common.base.Stopwatch)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 OutputStream (java.io.OutputStream)1 Charset (java.nio.charset.Charset)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 GZIPOutputStream (java.util.zip.GZIPOutputStream)1 MediaType (javax.ws.rs.core.MediaType)1 RsLn (nl.knaw.huygens.timbuctoo.remote.rs.xml.RsLn)1 RsMd (nl.knaw.huygens.timbuctoo.remote.rs.xml.RsMd)1 UrlItem (nl.knaw.huygens.timbuctoo.remote.rs.xml.UrlItem)1 Urlset (nl.knaw.huygens.timbuctoo.remote.rs.xml.Urlset)1