Search in sources :

Example 1 with LogList

use of nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogList in project timbuctoo by HuygensING.

the class ImportManager method addLog.

public Future<ImportStatus> addLog(String baseUri, String defaultGraph, String fileName, InputStream rdfInputStream, Optional<Charset> charset, MediaType mediaType) throws LogStorageFailedException {
    importStatus.start(this.getClass().getSimpleName() + ".addLog", baseUri);
    int[] index = new int[1];
    try {
        String token = logStorage.saveLog(rdfInputStream, fileName, mediaType, charset);
        logListStore.updateData(logList -> {
            index[0] = logList.addEntry(LogEntry.create(baseUri, defaultGraph, token));
            return logList;
        });
    } catch (IOException e) {
        importStatus.addError("Could not save log", e);
        throw new LogStorageFailedException(e);
    }
    return executorService.submit(() -> processLogsUntil(index[0]));
}
Also used : LogStorageFailedException(nl.knaw.huygens.timbuctoo.v5.filestorage.exceptions.LogStorageFailedException) IOException(java.io.IOException)

Example 2 with LogList

use of nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogList in project timbuctoo by HuygensING.

the class ImportManager method processLogsUntil.

private synchronized ImportStatus processLogsUntil(int maxIndex) {
    importStatus.start(this.getClass().getSimpleName() + ".processLogs", null);
    ListIterator<LogEntry> unprocessed = logListStore.getData().getUnprocessed();
    boolean dataWasAdded = false;
    while (unprocessed.hasNext() && unprocessed.nextIndex() <= maxIndex) {
        int index = unprocessed.nextIndex();
        LogEntry entry = unprocessed.next();
        importStatus.startEntry(entry);
        if (entry.getLogToken().isPresent()) {
            // logToken
            String logToken = entry.getLogToken().get();
            try (CachedLog log = logStorage.getLog(logToken)) {
                final Stopwatch stopwatch = Stopwatch.createStarted();
                for (RdfProcessor processor : subscribedProcessors) {
                    if (processor.getCurrentVersion() <= index) {
                        String msg = "******* " + processor.getClass().getSimpleName() + " Started importing full log...";
                        LOG.info(msg);
                        importStatus.setStatus(msg);
                        RdfParser rdfParser = serializerFactory.makeRdfParser(log);
                        processor.start(index);
                        rdfParser.importRdf(log, entry.getBaseUri(), entry.getDefaultGraph(), processor);
                        processor.commit();
                    }
                }
                long elapsedTime = stopwatch.elapsed(TimeUnit.SECONDS);
                String msg = "Finished importing. Total import took " + elapsedTime + " seconds.";
                LOG.info(msg);
                importStatus.setStatus(msg);
                dataWasAdded = true;
            } catch (Exception e) {
                LOG.error("Processing log failed", e);
                importStatus.addError("Processing log failed", e);
            }
            // Update the log, even after RdfProcessingFailedException | IOException
            try {
                logListStore.updateData(logList -> {
                    logList.markAsProcessed(index);
                    return logList;
                });
            } catch (IOException e) {
                LOG.error("Updating the log failed", e);
                importStatus.addError("Updating log failed", e);
            }
        } else {
            // no logToken
            RdfCreator creator = entry.getRdfCreator().get();
            String token = "";
            MediaType mediaType;
            Optional<Charset> charset;
            File tempFile = null;
            try {
                tempFile = File.createTempFile("log_to_generate", "nq");
                try (OutputStream stream = new GZIPOutputStream(new FileOutputStream(tempFile))) {
                    if (creator instanceof PlainRdfCreator) {
                        try (RdfSerializer serializer = serializerFactory.makeRdfSerializer(stream)) {
                            mediaType = serializer.getMediaType();
                            charset = Optional.of(serializer.getCharset());
                            ((PlainRdfCreator) creator).sendQuads(serializer, dataSet, importStatus::setStatus);
                        } catch (Exception e) {
                            LOG.error("Log generation failed", e);
                            importStatus.addError("Log generation failed", e);
                            break;
                        }
                    } else {
                        try (RdfPatchSerializer srlzr = serializerFactory.makeRdfPatchSerializer(stream, entry.getBaseUri())) {
                            mediaType = srlzr.getMediaType();
                            charset = Optional.of(srlzr.getCharset());
                            ((PatchRdfCreator) creator).sendQuads(srlzr, importStatus::setStatus, dataSet);
                        } catch (Exception e) {
                            LOG.error("Log generation failed", e);
                            importStatus.addError("Log generation failed", e);
                            break;
                        }
                    }
                }
                try (InputStream inputStream = new GZIPInputStream(new FileInputStream(tempFile))) {
                    token = logStorage.saveLog(inputStream, "log_generated_by_" + creator.getClass().getSimpleName(), mediaType, charset);
                }
                LogEntry entryWithLog;
                entryWithLog = LogEntry.addLogToEntry(entry, token);
                unprocessed.set(entryWithLog);
                token = "";
                // move back to process this item again
                unprocessed.previous();
            } catch (Exception e) {
                if (token.isEmpty()) {
                    LOG.error("Log processing failed", e);
                } else {
                    LOG.error("Log processing failed. Log created but not added to the list!", e);
                }
                importStatus.addError("Log processing failed", e);
                break;
            } finally {
                if (tempFile != null) {
                    tempFile.delete();
                }
            }
        }
        // end else with no condition
        importStatus.finishEntry();
    }
    // end main while loop
    if (dataWasAdded) {
        webhooks.run();
    }
    importStatus.finishList();
    // update log.json
    try {
        logListStore.updateData(Function.identity());
    } catch (IOException e) {
        LOG.error("Updating the log failed", e);
        importStatus.addError("Updating log failed", e);
    }
    return importStatus;
}
Also used : OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) GZIPOutputStream(java.util.zip.GZIPOutputStream) Stopwatch(com.google.common.base.Stopwatch) GZIPInputStream(java.util.zip.GZIPInputStream) GZIPOutputStream(java.util.zip.GZIPOutputStream) MediaType(javax.ws.rs.core.MediaType) CachedLog(nl.knaw.huygens.timbuctoo.v5.filestorage.dto.CachedLog) LogEntry(nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogEntry) GZIPInputStream(java.util.zip.GZIPInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) Charset(java.nio.charset.Charset) IOException(java.io.IOException) LogStorageFailedException(nl.knaw.huygens.timbuctoo.v5.filestorage.exceptions.LogStorageFailedException) IOException(java.io.IOException) FileStorageFailedException(nl.knaw.huygens.timbuctoo.v5.filestorage.exceptions.FileStorageFailedException) FileInputStream(java.io.FileInputStream) RdfSerializer(nl.knaw.huygens.timbuctoo.v5.rdfio.RdfSerializer) RdfPatchSerializer(nl.knaw.huygens.timbuctoo.v5.rdfio.RdfPatchSerializer) FileOutputStream(java.io.FileOutputStream) RdfCreator(nl.knaw.huygens.timbuctoo.v5.dataset.dto.RdfCreator) CachedFile(nl.knaw.huygens.timbuctoo.v5.filestorage.dto.CachedFile) File(java.io.File) RdfParser(nl.knaw.huygens.timbuctoo.v5.rdfio.RdfParser)

Example 3 with LogList

use of nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogList in project timbuctoo by HuygensING.

the class ImportManagerTest method makeSimpleDataSet.

@Before
public void makeSimpleDataSet() throws IOException {
    logListLocation = File.createTempFile("logList", ".json");
    logListLocation.delete();
    filesDir = Files.createTempDir();
    fileStorage = new FileSystemFileStorage(filesDir);
    this.importManager = new ImportManager(JsonFileBackedData.getOrCreate(logListLocation, LogList::new, new TypeReference<LogList>() {
    }), fileStorage, fileStorage, fileStorage, Executors.newSingleThreadExecutor(), new Rdf4jIoFactory(), () -> {
    });
}
Also used : FileSystemFileStorage(nl.knaw.huygens.timbuctoo.v5.filestorage.implementations.filesystem.FileSystemFileStorage) LogList(nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogList) Rdf4jIoFactory(nl.knaw.huygens.timbuctoo.v5.rdfio.implementations.rdf4j.Rdf4jIoFactory) Before(org.junit.Before)

Example 4 with LogList

use of nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogList in project timbuctoo by HuygensING.

the class ImportStatusTest method jsonSerialization.

@Test
public void jsonSerialization() throws Exception {
    LogList logList = new LogList();
    LogEntry entry = LogEntry.create("baseUri", "defaultGraph", "token");
    logList.addEntry(entry);
    ImportStatus status = new ImportStatus(logList);
    status.start("method", "baseUri");
    assertThat(status.getStatus(), is("Started method"));
    status.addError("This error is recorded in logList", new RuntimeException("list"));
    assertThat(status.getStatus().contains("This error is recorded in logList"), is(true));
    assertThat(logList.getListErrors().get(0).contains("This error is recorded in logList"), is(true));
    status.startEntry(entry);
    assertThat(status.getStatus(), is("Adding entry with token token"));
    status.addError("This error is recorded in logEntry", new RuntimeException("entry"));
    assertThat(status.getStatus().contains("This error is recorded in logEntry"), is(true));
    assertThat(entry.getImportStatus().get().getErrors().get(0).contains("This error is recorded in logEntry"), is(true));
    status.finishEntry();
    status.finishList();
    String json = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT).writeValueAsString(status);
    // System.out.println(json);
    assertThat(json.contains("\"@type\" : \"ImportStatus\""), is(true));
}
Also used : LogList(nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogList) LogEntry(nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogEntry) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 5 with LogList

use of nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogList in project timbuctoo by HuygensING.

the class BdbRmlDataSourceStoreTest method itWorks.

@Test
public void itWorks() throws Exception {
    BdbNonPersistentEnvironmentCreator dbCreator = new BdbNonPersistentEnvironmentCreator();
    DataSetMetaData dataSetMetadata = new BasicDataSetMetaData("userid", "datasetid", "http://timbuctoo.huygens.knaw.nl/v5/userid/datasetid", "http://example.org/prefix/", false, false);
    final RmlDataSourceStore rmlDataSourceStore = new BdbRmlDataSourceStore(dbCreator.getDatabase("userid", "datasetid", "rmlSource", true, TupleBinding.getPrimitiveBinding(String.class), TupleBinding.getPrimitiveBinding(String.class), new StringStringIsCleanHandler()), new ImportStatus(new LogList()));
    RdfSerializer rdfSerializer = new RmlDataSourceRdfSerializer(rmlDataSourceStore);
    RawUploadRdfSaver rawUploadRdfSaver = new RawUploadRdfSaver(dataSetMetadata, "fileName", APPLICATION_OCTET_STREAM_TYPE, rdfSerializer, "origFileName", Clock.systemUTC());
    final String inputCol1 = rawUploadRdfSaver.addCollection("collection1");
    ImportPropertyDescriptions importPropertyDescriptions = new ImportPropertyDescriptions();
    importPropertyDescriptions.getOrCreate(1).setPropertyName("propName1");
    importPropertyDescriptions.getOrCreate(2).setPropertyName("propName2");
    rawUploadRdfSaver.addPropertyDescriptions(inputCol1, importPropertyDescriptions);
    rawUploadRdfSaver.addEntity(inputCol1, ImmutableMap.of("propName1", "value1", "propName2", "val2"));
    rawUploadRdfSaver.addEntity(inputCol1, ImmutableMap.of("propName1", "entVal1", "propName2", "entVal2"));
    final String inputCol2 = rawUploadRdfSaver.addCollection("collection2");
    ImportPropertyDescriptions importPropertyDescriptions1 = new ImportPropertyDescriptions();
    importPropertyDescriptions1.getOrCreate(1).setPropertyName("prop3");
    importPropertyDescriptions1.getOrCreate(2).setPropertyName("prop4");
    rawUploadRdfSaver.addPropertyDescriptions(inputCol2, importPropertyDescriptions1);
    rawUploadRdfSaver.addEntity(inputCol2, ImmutableMap.of("prop3", "value1", "prop4", "val2"));
    rawUploadRdfSaver.addEntity(inputCol2, ImmutableMap.of("prop3", "entVal1", "prop4", "entVal2"));
    rdfSerializer.close();
    RdfDataSource rdfDataSource = new RdfDataSource(rmlDataSourceStore, inputCol1, new JexlRowFactory(ImmutableMap.of(), new HashMapBasedJoinHandler()));
    RdfDataSource rdfDataSource2 = new RdfDataSource(rmlDataSourceStore, inputCol2, new JexlRowFactory(ImmutableMap.of(), new HashMapBasedJoinHandler()));
    final List<String> collection1;
    final List<String> collection2;
    try (Stream<Row> stream = rdfDataSource.getRows(new ThrowingErrorHandler())) {
        collection1 = stream.map(x -> x.getRawValue("propName1") + ":" + x.getRawValue("propName2")).collect(toList());
    }
    try (Stream<Row> stream = rdfDataSource2.getRows(new ThrowingErrorHandler())) {
        collection2 = stream.map(x -> x.getRawValue("prop3") + ":" + x.getRawValue("prop4")).collect(toList());
    }
    assertThat(collection1, contains("value1:val2", "entVal1:entVal2"));
    assertThat(collection2, contains("value1:val2", "entVal1:entVal2"));
    dbCreator.close();
}
Also used : RawUploadRdfSaver(nl.knaw.huygens.timbuctoo.v5.bulkupload.RawUploadRdfSaver) ImportPropertyDescriptions(nl.knaw.huygens.timbuctoo.bulkupload.parsingstatemachine.ImportPropertyDescriptions) StringStringIsCleanHandler(nl.knaw.huygens.timbuctoo.v5.berkeleydb.isclean.StringStringIsCleanHandler) RmlDataSourceStore(nl.knaw.huygens.timbuctoo.v5.datastores.rmldatasource.RmlDataSourceStore) BdbNonPersistentEnvironmentCreator(nl.knaw.huygens.timbuctoo.v5.dropwizard.BdbNonPersistentEnvironmentCreator) RdfSerializer(nl.knaw.huygens.timbuctoo.v5.rdfio.RdfSerializer) JexlRowFactory(nl.knaw.huygens.timbuctoo.rml.datasource.jexl.JexlRowFactory) LogList(nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogList) ImportStatus(nl.knaw.huygens.timbuctoo.v5.dataset.ImportStatus) HashMapBasedJoinHandler(nl.knaw.huygens.timbuctoo.rml.datasource.joinhandlers.HashMapBasedJoinHandler) BasicDataSetMetaData(nl.knaw.huygens.timbuctoo.v5.dataset.dto.BasicDataSetMetaData) ThrowingErrorHandler(nl.knaw.huygens.timbuctoo.rml.ThrowingErrorHandler) Row(nl.knaw.huygens.timbuctoo.rml.Row) DataSetMetaData(nl.knaw.huygens.timbuctoo.v5.dataset.dto.DataSetMetaData) BasicDataSetMetaData(nl.knaw.huygens.timbuctoo.v5.dataset.dto.BasicDataSetMetaData) RdfDataSource(nl.knaw.huygens.timbuctoo.v5.rml.RdfDataSource) Test(org.junit.Test)

Aggregations

LogList (nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogList)6 IOException (java.io.IOException)3 ImportStatus (nl.knaw.huygens.timbuctoo.v5.dataset.ImportStatus)3 LogEntry (nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogEntry)3 LogStorageFailedException (nl.knaw.huygens.timbuctoo.v5.filestorage.exceptions.LogStorageFailedException)3 Test (org.junit.Test)3 File (java.io.File)2 DataSetMetaData (nl.knaw.huygens.timbuctoo.v5.dataset.dto.DataSetMetaData)2 CachedFile (nl.knaw.huygens.timbuctoo.v5.filestorage.dto.CachedFile)2 RdfSerializer (nl.knaw.huygens.timbuctoo.v5.rdfio.RdfSerializer)2 Before (org.junit.Before)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 Stopwatch (com.google.common.base.Stopwatch)1 FileInputStream (java.io.FileInputStream)1 FileOutputStream (java.io.FileOutputStream)1 InputStream (java.io.InputStream)1 OutputStream (java.io.OutputStream)1 Charset (java.nio.charset.Charset)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 GZIPOutputStream (java.util.zip.GZIPOutputStream)1