Search in sources :

Example 1 with RdfParser

use of nl.knaw.huygens.timbuctoo.v5.rdfio.RdfParser in project timbuctoo by HuygensING.

the class ImportManager method processLogsUntil.

private synchronized ImportStatus processLogsUntil(int maxIndex) {
    importStatus.start(this.getClass().getSimpleName() + ".processLogs", null);
    ListIterator<LogEntry> unprocessed = logListStore.getData().getUnprocessed();
    boolean dataWasAdded = false;
    while (unprocessed.hasNext() && unprocessed.nextIndex() <= maxIndex) {
        int index = unprocessed.nextIndex();
        LogEntry entry = unprocessed.next();
        importStatus.startEntry(entry);
        if (entry.getLogToken().isPresent()) {
            // logToken
            String logToken = entry.getLogToken().get();
            try (CachedLog log = logStorage.getLog(logToken)) {
                final Stopwatch stopwatch = Stopwatch.createStarted();
                for (RdfProcessor processor : subscribedProcessors) {
                    if (processor.getCurrentVersion() <= index) {
                        String msg = "******* " + processor.getClass().getSimpleName() + " Started importing full log...";
                        LOG.info(msg);
                        importStatus.setStatus(msg);
                        RdfParser rdfParser = serializerFactory.makeRdfParser(log);
                        processor.start(index);
                        rdfParser.importRdf(log, entry.getBaseUri(), entry.getDefaultGraph(), processor);
                        processor.commit();
                    }
                }
                long elapsedTime = stopwatch.elapsed(TimeUnit.SECONDS);
                String msg = "Finished importing. Total import took " + elapsedTime + " seconds.";
                LOG.info(msg);
                importStatus.setStatus(msg);
                dataWasAdded = true;
            } catch (Exception e) {
                LOG.error("Processing log failed", e);
                importStatus.addError("Processing log failed", e);
            }
            // Update the log, even after RdfProcessingFailedException | IOException
            try {
                logListStore.updateData(logList -> {
                    logList.markAsProcessed(index);
                    return logList;
                });
            } catch (IOException e) {
                LOG.error("Updating the log failed", e);
                importStatus.addError("Updating log failed", e);
            }
        } else {
            // no logToken
            RdfCreator creator = entry.getRdfCreator().get();
            String token = "";
            MediaType mediaType;
            Optional<Charset> charset;
            File tempFile = null;
            try {
                tempFile = File.createTempFile("log_to_generate", "nq");
                try (OutputStream stream = new GZIPOutputStream(new FileOutputStream(tempFile))) {
                    if (creator instanceof PlainRdfCreator) {
                        try (RdfSerializer serializer = serializerFactory.makeRdfSerializer(stream)) {
                            mediaType = serializer.getMediaType();
                            charset = Optional.of(serializer.getCharset());
                            ((PlainRdfCreator) creator).sendQuads(serializer, dataSet, importStatus::setStatus);
                        } catch (Exception e) {
                            LOG.error("Log generation failed", e);
                            importStatus.addError("Log generation failed", e);
                            break;
                        }
                    } else {
                        try (RdfPatchSerializer srlzr = serializerFactory.makeRdfPatchSerializer(stream, entry.getBaseUri())) {
                            mediaType = srlzr.getMediaType();
                            charset = Optional.of(srlzr.getCharset());
                            ((PatchRdfCreator) creator).sendQuads(srlzr, importStatus::setStatus, dataSet);
                        } catch (Exception e) {
                            LOG.error("Log generation failed", e);
                            importStatus.addError("Log generation failed", e);
                            break;
                        }
                    }
                }
                try (InputStream inputStream = new GZIPInputStream(new FileInputStream(tempFile))) {
                    token = logStorage.saveLog(inputStream, "log_generated_by_" + creator.getClass().getSimpleName(), mediaType, charset);
                }
                LogEntry entryWithLog;
                entryWithLog = LogEntry.addLogToEntry(entry, token);
                unprocessed.set(entryWithLog);
                token = "";
                // move back to process this item again
                unprocessed.previous();
            } catch (Exception e) {
                if (token.isEmpty()) {
                    LOG.error("Log processing failed", e);
                } else {
                    LOG.error("Log processing failed. Log created but not added to the list!", e);
                }
                importStatus.addError("Log processing failed", e);
                break;
            } finally {
                if (tempFile != null) {
                    tempFile.delete();
                }
            }
        }
        // end else with no condition
        importStatus.finishEntry();
    }
    // end main while loop
    if (dataWasAdded) {
        webhooks.run();
    }
    importStatus.finishList();
    // update log.json
    try {
        logListStore.updateData(Function.identity());
    } catch (IOException e) {
        LOG.error("Updating the log failed", e);
        importStatus.addError("Updating log failed", e);
    }
    return importStatus;
}
Also used : OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) GZIPOutputStream(java.util.zip.GZIPOutputStream) Stopwatch(com.google.common.base.Stopwatch) GZIPInputStream(java.util.zip.GZIPInputStream) GZIPOutputStream(java.util.zip.GZIPOutputStream) MediaType(javax.ws.rs.core.MediaType) CachedLog(nl.knaw.huygens.timbuctoo.v5.filestorage.dto.CachedLog) LogEntry(nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogEntry) GZIPInputStream(java.util.zip.GZIPInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) Charset(java.nio.charset.Charset) IOException(java.io.IOException) LogStorageFailedException(nl.knaw.huygens.timbuctoo.v5.filestorage.exceptions.LogStorageFailedException) IOException(java.io.IOException) FileStorageFailedException(nl.knaw.huygens.timbuctoo.v5.filestorage.exceptions.FileStorageFailedException) FileInputStream(java.io.FileInputStream) RdfSerializer(nl.knaw.huygens.timbuctoo.v5.rdfio.RdfSerializer) RdfPatchSerializer(nl.knaw.huygens.timbuctoo.v5.rdfio.RdfPatchSerializer) FileOutputStream(java.io.FileOutputStream) RdfCreator(nl.knaw.huygens.timbuctoo.v5.dataset.dto.RdfCreator) CachedFile(nl.knaw.huygens.timbuctoo.v5.filestorage.dto.CachedFile) File(java.io.File) RdfParser(nl.knaw.huygens.timbuctoo.v5.rdfio.RdfParser)

Example 2 with RdfParser

use of nl.knaw.huygens.timbuctoo.v5.rdfio.RdfParser in project timbuctoo by HuygensING.

the class Rdf4jRdfParser method importRdf.

@Override
public void importRdf(CachedLog input, String baseUri, String defaultGraph, RdfProcessor rdfProcessor) throws RdfProcessingFailedException {
    try {
        RDFFormat format = Rio.getParserFormatForMIMEType(input.getMimeType().toString()).orElseThrow(() -> new UnsupportedRDFormatException(input.getMimeType() + " is not a supported rdf type."));
        RDFParser rdfParser = Rio.createParser(format);
        rdfParser.setPreserveBNodeIDs(true);
        rdfParser.setRDFHandler(new TimRdfHandler(rdfProcessor, defaultGraph, input.getFile().getName()));
        rdfParser.parse(input.getReader(), baseUri);
    } catch (IOException | RDFParseException | UnsupportedRDFormatException e) {
        throw new RdfProcessingFailedException(e);
    } catch (RDFHandlerException e) {
        if (e.getCause() instanceof RdfProcessingFailedException) {
            throw (RdfProcessingFailedException) e.getCause();
        } else {
            throw new RdfProcessingFailedException(e);
        }
    }
}
Also used : UnsupportedRDFormatException(org.eclipse.rdf4j.rio.UnsupportedRDFormatException) RDFHandlerException(org.eclipse.rdf4j.rio.RDFHandlerException) TimRdfHandler(nl.knaw.huygens.timbuctoo.v5.rdfio.implementations.rdf4j.parsers.TimRdfHandler) IOException(java.io.IOException) RDFParser(org.eclipse.rdf4j.rio.RDFParser) RdfProcessingFailedException(nl.knaw.huygens.timbuctoo.v5.dataset.exceptions.RdfProcessingFailedException) RDFFormat(org.eclipse.rdf4j.rio.RDFFormat) RDFParseException(org.eclipse.rdf4j.rio.RDFParseException)

Aggregations

IOException (java.io.IOException)2 Stopwatch (com.google.common.base.Stopwatch)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 FileOutputStream (java.io.FileOutputStream)1 InputStream (java.io.InputStream)1 OutputStream (java.io.OutputStream)1 Charset (java.nio.charset.Charset)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 GZIPOutputStream (java.util.zip.GZIPOutputStream)1 MediaType (javax.ws.rs.core.MediaType)1 LogEntry (nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogEntry)1 RdfCreator (nl.knaw.huygens.timbuctoo.v5.dataset.dto.RdfCreator)1 RdfProcessingFailedException (nl.knaw.huygens.timbuctoo.v5.dataset.exceptions.RdfProcessingFailedException)1 CachedFile (nl.knaw.huygens.timbuctoo.v5.filestorage.dto.CachedFile)1 CachedLog (nl.knaw.huygens.timbuctoo.v5.filestorage.dto.CachedLog)1 FileStorageFailedException (nl.knaw.huygens.timbuctoo.v5.filestorage.exceptions.FileStorageFailedException)1 LogStorageFailedException (nl.knaw.huygens.timbuctoo.v5.filestorage.exceptions.LogStorageFailedException)1 RdfParser (nl.knaw.huygens.timbuctoo.v5.rdfio.RdfParser)1 RdfPatchSerializer (nl.knaw.huygens.timbuctoo.v5.rdfio.RdfPatchSerializer)1