use of nl.knaw.huygens.timbuctoo.v5.rdfio.RdfSerializer in project timbuctoo by HuygensING.
the class TabularRdfCreator method sendQuads.
@Override
public void sendQuads(RdfSerializer saver, DataSet dataSet, Consumer<String> statusConsumer) throws LogStorageFailedException {
try (CachedFile file = dataSet.getImportManager().getFile(fileToken)) {
final RawUploadRdfSaver rawUploadRdfSaver = new RawUploadRdfSaver(dataSet.getMetadata(), file.getFile().getName(), file.getMimeType(), saver, fileName, Clock.systemUTC());
loader.loadData(Lists.newArrayList(tuple(fileName, file.getFile())), new Importer(new StateMachine<>(rawUploadRdfSaver), new ResultReporter(statusConsumer)));
} catch (Exception e) {
throw new LogStorageFailedException(e);
}
}
use of nl.knaw.huygens.timbuctoo.v5.rdfio.RdfSerializer in project timbuctoo by HuygensING.
the class ImportManager method processLogsUntil.
private synchronized ImportStatus processLogsUntil(int maxIndex) {
importStatus.start(this.getClass().getSimpleName() + ".processLogs", null);
ListIterator<LogEntry> unprocessed = logListStore.getData().getUnprocessed();
boolean dataWasAdded = false;
while (unprocessed.hasNext() && unprocessed.nextIndex() <= maxIndex) {
int index = unprocessed.nextIndex();
LogEntry entry = unprocessed.next();
importStatus.startEntry(entry);
if (entry.getLogToken().isPresent()) {
// logToken
String logToken = entry.getLogToken().get();
try (CachedLog log = logStorage.getLog(logToken)) {
final Stopwatch stopwatch = Stopwatch.createStarted();
for (RdfProcessor processor : subscribedProcessors) {
if (processor.getCurrentVersion() <= index) {
String msg = "******* " + processor.getClass().getSimpleName() + " Started importing full log...";
LOG.info(msg);
importStatus.setStatus(msg);
RdfParser rdfParser = serializerFactory.makeRdfParser(log);
processor.start(index);
rdfParser.importRdf(log, entry.getBaseUri(), entry.getDefaultGraph(), processor);
processor.commit();
}
}
long elapsedTime = stopwatch.elapsed(TimeUnit.SECONDS);
String msg = "Finished importing. Total import took " + elapsedTime + " seconds.";
LOG.info(msg);
importStatus.setStatus(msg);
dataWasAdded = true;
} catch (Exception e) {
LOG.error("Processing log failed", e);
importStatus.addError("Processing log failed", e);
}
// Update the log, even after RdfProcessingFailedException | IOException
try {
logListStore.updateData(logList -> {
logList.markAsProcessed(index);
return logList;
});
} catch (IOException e) {
LOG.error("Updating the log failed", e);
importStatus.addError("Updating log failed", e);
}
} else {
// no logToken
RdfCreator creator = entry.getRdfCreator().get();
String token = "";
MediaType mediaType;
Optional<Charset> charset;
File tempFile = null;
try {
tempFile = File.createTempFile("log_to_generate", "nq");
try (OutputStream stream = new GZIPOutputStream(new FileOutputStream(tempFile))) {
if (creator instanceof PlainRdfCreator) {
try (RdfSerializer serializer = serializerFactory.makeRdfSerializer(stream)) {
mediaType = serializer.getMediaType();
charset = Optional.of(serializer.getCharset());
((PlainRdfCreator) creator).sendQuads(serializer, dataSet, importStatus::setStatus);
} catch (Exception e) {
LOG.error("Log generation failed", e);
importStatus.addError("Log generation failed", e);
break;
}
} else {
try (RdfPatchSerializer srlzr = serializerFactory.makeRdfPatchSerializer(stream, entry.getBaseUri())) {
mediaType = srlzr.getMediaType();
charset = Optional.of(srlzr.getCharset());
((PatchRdfCreator) creator).sendQuads(srlzr, importStatus::setStatus, dataSet);
} catch (Exception e) {
LOG.error("Log generation failed", e);
importStatus.addError("Log generation failed", e);
break;
}
}
}
try (InputStream inputStream = new GZIPInputStream(new FileInputStream(tempFile))) {
token = logStorage.saveLog(inputStream, "log_generated_by_" + creator.getClass().getSimpleName(), mediaType, charset);
}
LogEntry entryWithLog;
entryWithLog = LogEntry.addLogToEntry(entry, token);
unprocessed.set(entryWithLog);
token = "";
// move back to process this item again
unprocessed.previous();
} catch (Exception e) {
if (token.isEmpty()) {
LOG.error("Log processing failed", e);
} else {
LOG.error("Log processing failed. Log created but not added to the list!", e);
}
importStatus.addError("Log processing failed", e);
break;
} finally {
if (tempFile != null) {
tempFile.delete();
}
}
}
// end else with no condition
importStatus.finishEntry();
}
// end main while loop
if (dataWasAdded) {
webhooks.run();
}
importStatus.finishList();
// update log.json
try {
logListStore.updateData(Function.identity());
} catch (IOException e) {
LOG.error("Updating the log failed", e);
importStatus.addError("Updating log failed", e);
}
return importStatus;
}
use of nl.knaw.huygens.timbuctoo.v5.rdfio.RdfSerializer in project timbuctoo by HuygensING.
the class BdbRmlDataSourceStoreTest method itWorks.
@Test
public void itWorks() throws Exception {
BdbNonPersistentEnvironmentCreator dbCreator = new BdbNonPersistentEnvironmentCreator();
DataSetMetaData dataSetMetadata = new BasicDataSetMetaData("userid", "datasetid", "http://timbuctoo.huygens.knaw.nl/v5/userid/datasetid", "http://example.org/prefix/", false, false);
final RmlDataSourceStore rmlDataSourceStore = new BdbRmlDataSourceStore(dbCreator.getDatabase("userid", "datasetid", "rmlSource", true, TupleBinding.getPrimitiveBinding(String.class), TupleBinding.getPrimitiveBinding(String.class), new StringStringIsCleanHandler()), new ImportStatus(new LogList()));
RdfSerializer rdfSerializer = new RmlDataSourceRdfSerializer(rmlDataSourceStore);
RawUploadRdfSaver rawUploadRdfSaver = new RawUploadRdfSaver(dataSetMetadata, "fileName", APPLICATION_OCTET_STREAM_TYPE, rdfSerializer, "origFileName", Clock.systemUTC());
final String inputCol1 = rawUploadRdfSaver.addCollection("collection1");
ImportPropertyDescriptions importPropertyDescriptions = new ImportPropertyDescriptions();
importPropertyDescriptions.getOrCreate(1).setPropertyName("propName1");
importPropertyDescriptions.getOrCreate(2).setPropertyName("propName2");
rawUploadRdfSaver.addPropertyDescriptions(inputCol1, importPropertyDescriptions);
rawUploadRdfSaver.addEntity(inputCol1, ImmutableMap.of("propName1", "value1", "propName2", "val2"));
rawUploadRdfSaver.addEntity(inputCol1, ImmutableMap.of("propName1", "entVal1", "propName2", "entVal2"));
final String inputCol2 = rawUploadRdfSaver.addCollection("collection2");
ImportPropertyDescriptions importPropertyDescriptions1 = new ImportPropertyDescriptions();
importPropertyDescriptions1.getOrCreate(1).setPropertyName("prop3");
importPropertyDescriptions1.getOrCreate(2).setPropertyName("prop4");
rawUploadRdfSaver.addPropertyDescriptions(inputCol2, importPropertyDescriptions1);
rawUploadRdfSaver.addEntity(inputCol2, ImmutableMap.of("prop3", "value1", "prop4", "val2"));
rawUploadRdfSaver.addEntity(inputCol2, ImmutableMap.of("prop3", "entVal1", "prop4", "entVal2"));
rdfSerializer.close();
RdfDataSource rdfDataSource = new RdfDataSource(rmlDataSourceStore, inputCol1, new JexlRowFactory(ImmutableMap.of(), new HashMapBasedJoinHandler()));
RdfDataSource rdfDataSource2 = new RdfDataSource(rmlDataSourceStore, inputCol2, new JexlRowFactory(ImmutableMap.of(), new HashMapBasedJoinHandler()));
final List<String> collection1;
final List<String> collection2;
try (Stream<Row> stream = rdfDataSource.getRows(new ThrowingErrorHandler())) {
collection1 = stream.map(x -> x.getRawValue("propName1") + ":" + x.getRawValue("propName2")).collect(toList());
}
try (Stream<Row> stream = rdfDataSource2.getRows(new ThrowingErrorHandler())) {
collection2 = stream.map(x -> x.getRawValue("prop3") + ":" + x.getRawValue("prop4")).collect(toList());
}
assertThat(collection1, contains("value1:val2", "entVal1:entVal2"));
assertThat(collection2, contains("value1:val2", "entVal1:entVal2"));
dbCreator.close();
}
use of nl.knaw.huygens.timbuctoo.v5.rdfio.RdfSerializer in project timbuctoo by HuygensING.
the class RawUploadRdfSaverTest method usageTest.
@Test
public void usageTest() throws LogStorageFailedException {
RdfToStringFaker rdfSerializer = new RdfToStringFaker();
RawUploadRdfSaver instance = instanceWithRdfSerializer(rdfSerializer, dataSetMetadata);
final String collection1 = instance.addCollection("collection1");
ImportPropertyDescriptions importPropertyDescriptions = new ImportPropertyDescriptions();
importPropertyDescriptions.getOrCreate(1).setPropertyName("propName1");
importPropertyDescriptions.getOrCreate(2).setPropertyName("propName2");
instance.addPropertyDescriptions(collection1, importPropertyDescriptions);
instance.addEntity(collection1, ImmutableMap.of("propName1", "value1", "propName2", "val2"));
instance.addEntity(collection1, ImmutableMap.of("propName1", "entVal1", "propName2", "entVal2"));
final String collection2 = instance.addCollection("collection2");
ImportPropertyDescriptions importPropertyDescriptions1 = new ImportPropertyDescriptions();
importPropertyDescriptions1.getOrCreate(1).setPropertyName("prop3");
importPropertyDescriptions1.getOrCreate(2).setPropertyName("prop4");
instance.addPropertyDescriptions(collection2, importPropertyDescriptions1);
instance.addEntity(collection2, ImmutableMap.of("prop3", "value1", "prop4", "val2"));
instance.addEntity(collection2, ImmutableMap.of("prop3", "entVal1", "prop4", "entVal2"));
String generatedRdf = rdfSerializer.toString();
// Use assertEquals because the failing Hamcrest output is hard to compare
String graphName = dataSetMetadata.getBaseUri();
String fileUri = dataSetMetadata.getUriPrefix() + "rawData/fileName/";
String prop = fileUri + "props/";
String rowData = fileUri + "entities/";
assertEquals(fileUri + " " + RDF_TYPE + " " + TIM_TABULAR_FILE + " " + graphName + "\n" + graphName + " " + PROV_DERIVED_FROM + " " + fileUri + " " + graphName + "\n" + fileUri + " " + TIM_MIMETYPE + " " + "application/octet-stream" + "^^" + STRING + " " + graphName + "\n" + fileUri + " " + RDFS_LABEL + " " + FILE_NAME + "^^" + STRING + " " + graphName + "\n" + fileUri + " " + PROV_ATTIME + " " + DATE + "^^" + XSD_DATETIMESTAMP + " " + graphName + "\n" + collection1 + " " + RDF_TYPE + " " + collection1 + "type " + graphName + "\n" + collection1 + " " + RDF_TYPE + " " + TIM_TABULAR_COLLECTION + " " + graphName + "\n" + collection1 + " " + RDFS_LABEL + " collection1" + "^^" + STRING + " " + graphName + "\n" + fileUri + " " + TIM_HASCOLLECTION + " " + collection1 + " " + graphName + "\n" + fileUri + " " + TIMBUCTOO_NEXT + " " + collection1 + " " + graphName + "\n" + prop + "tim_id " + RDF_TYPE + " " + TIM_PROP_DESC + " " + graphName + "\n" + collection1 + " " + TIM_HAS_PROPERTY + " " + prop + "tim_id " + graphName + "\n" + prop + "tim_id " + TIM_PROP_ID + " -1" + "^^" + INTEGER + " " + graphName + "\n" + prop + "tim_id " + RDFS_LABEL + " tim_id" + "^^" + STRING + " " + graphName + "\n" + prop + "propName1 " + RDF_TYPE + " " + TIM_PROP_DESC + " " + graphName + "\n" + collection1 + " " + TIM_HAS_PROPERTY + " " + prop + "propName1 " + graphName + "\n" + prop + "propName1 " + TIM_PROP_ID + " 1" + "^^" + INTEGER + " " + graphName + "\n" + prop + "propName1 " + RDFS_LABEL + " propName1" + "^^" + STRING + " " + graphName + "\n" + prop + "tim_id " + TIMBUCTOO_NEXT + " " + prop + "propName1 " + graphName + "\n" + prop + "propName2 " + RDF_TYPE + " " + TIM_PROP_DESC + " " + graphName + "\n" + collection1 + " " + TIM_HAS_PROPERTY + " " + prop + "propName2 " + graphName + "\n" + prop + "propName2 " + TIM_PROP_ID + " 2" + "^^" + INTEGER + " " + graphName + "\n" + prop + "propName2 " + RDFS_LABEL + " propName2" + "^^" + STRING + " " + graphName + "\n" + prop + "propName1 " + TIMBUCTOO_NEXT + " " + prop + "propName2 " + graphName + "\n" + rowData + "1 " + RDF_TYPE + " " + collection1 + " " + graphName + "\n" + collection1 + " " + TIM_HAS_ROW + " " + rowData + "1 " + graphName + "\n" + rowData + "1 " + prop + "propName1" + " value1" + "^^" + STRING + " " + graphName + "\n" + rowData + "1 " + prop + "propName2" + " val2" + "^^" + STRING + " " + graphName + "\n" + rowData + "1 " + prop + "tim_id" + " {UUID}" + "^^" + STRING + " " + graphName + "\n" + rowData + "2 " + RDF_TYPE + " " + collection1 + " " + graphName + "\n" + collection1 + " " + TIM_HAS_ROW + " " + rowData + "2 " + graphName + "\n" + rowData + "2 " + prop + "propName1" + " entVal1" + "^^" + STRING + " " + graphName + "\n" + rowData + "2 " + prop + "propName2" + " entVal2" + "^^" + STRING + " " + graphName + "\n" + rowData + "2 " + prop + "tim_id" + " {UUID}" + "^^" + STRING + " " + graphName + "\n" + collection2 + " " + RDF_TYPE + " " + collection2 + "type " + graphName + "\n" + collection2 + " " + RDF_TYPE + " " + TIM_TABULAR_COLLECTION + " " + graphName + "\n" + collection2 + " " + RDFS_LABEL + " collection2" + "^^" + STRING + " " + graphName + "\n" + fileUri + " " + TIM_HASCOLLECTION + " " + collection2 + " " + graphName + "\n" + collection1 + " " + TIMBUCTOO_NEXT + " " + collection2 + " " + graphName + "\n" + prop + "tim_id " + RDF_TYPE + " " + TIM_PROP_DESC + " " + graphName + "\n" + collection2 + " " + TIM_HAS_PROPERTY + " " + prop + "tim_id " + graphName + "\n" + prop + "tim_id " + TIM_PROP_ID + " -1" + "^^" + INTEGER + " " + graphName + "\n" + prop + "tim_id " + RDFS_LABEL + " tim_id" + "^^" + STRING + " " + graphName + "\n" + prop + "prop3 " + RDF_TYPE + " " + TIM_PROP_DESC + " " + graphName + "\n" + collection2 + " " + TIM_HAS_PROPERTY + " " + prop + "prop3 " + graphName + "\n" + prop + "prop3 " + TIM_PROP_ID + " 1" + "^^" + INTEGER + " " + graphName + "\n" + prop + "prop3 " + RDFS_LABEL + " prop3" + "^^" + STRING + " " + graphName + "\n" + prop + "tim_id " + TIMBUCTOO_NEXT + " " + prop + "prop3 " + graphName + "\n" + prop + "prop4 " + RDF_TYPE + " " + TIM_PROP_DESC + " " + graphName + "\n" + collection2 + " " + TIM_HAS_PROPERTY + " " + prop + "prop4 " + graphName + "\n" + prop + "prop4 " + TIM_PROP_ID + " 2" + "^^" + INTEGER + " " + graphName + "\n" + prop + "prop4 " + RDFS_LABEL + " prop4" + "^^" + STRING + " " + graphName + "\n" + prop + "prop3 " + TIMBUCTOO_NEXT + " " + prop + "prop4 " + graphName + "\n" + rowData + "3 " + RDF_TYPE + " " + collection2 + " " + graphName + "\n" + collection2 + " " + TIM_HAS_ROW + " " + rowData + "3 " + graphName + "\n" + rowData + "3 " + prop + "prop3" + " value1" + "^^" + STRING + " " + graphName + "\n" + rowData + "3 " + prop + "prop4" + " val2" + "^^" + STRING + " " + graphName + "\n" + rowData + "3 " + prop + "tim_id" + " {UUID}" + "^^" + STRING + " " + graphName + "\n" + rowData + "4 " + RDF_TYPE + " " + collection2 + " " + graphName + "\n" + collection2 + " " + TIM_HAS_ROW + " " + rowData + "4 " + graphName + "\n" + rowData + "4 " + prop + "prop3" + " entVal1" + "^^" + STRING + " " + graphName + "\n" + rowData + "4 " + prop + "prop4" + " entVal2" + "^^" + STRING + " " + graphName + "\n" + rowData + "4 " + prop + "tim_id" + " {UUID}" + "^^" + STRING + " " + graphName + "\n", generatedRdf.replaceAll("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", "{UUID}"));
}
use of nl.knaw.huygens.timbuctoo.v5.rdfio.RdfSerializer in project timbuctoo by HuygensING.
the class RmlRdfCreator method sendQuads.
@Override
public void sendQuads(RdfSerializer saver, DataSet dataSet, Consumer<String> status) throws LogStorageFailedException {
RdfDataSourceFactory dataSourceFactory = dataSet.getDataSource();
final Model model = ModelFactory.createDefaultModel();
try {
model.read(new ByteArrayInputStream(rdfData.getBytes(StandardCharsets.UTF_8)), null, "JSON-LD");
} catch (Exception e) {
throw new LogStorageFailedException(e);
}
final RmlMappingDocument rmlMappingDocument = rmlBuilder.fromRdf(model, dataSourceFactory::apply);
if (rmlMappingDocument.getErrors().size() > 0) {
throw new LogStorageFailedException("failure: " + String.join("\nfailure: ", rmlMappingDocument.getErrors()) + "\n");
}
// FIXME: trigger onprefix for all rml prefixes
// FIXME: store rml and retrieve it from tripleStore when mapping
Stream<Quad> triples = rmlMappingDocument.execute(new ReportingErrorHandler(status));
Iterator<Quad> iterator = triples.iterator();
while (iterator.hasNext()) {
Quad triple = iterator.next();
saver.onQuad(triple.getSubject().getUri().get(), triple.getPredicate().getUri().get(), triple.getObject().getContent(), triple.getObject().getLiteralType().orElse(null), triple.getObject().getLiteralLanguage().orElse(null), baseUri);
}
}
Aggregations