use of nl.knaw.huygens.timbuctoo.v5.datastores.implementations.bdb.BdbRmlDataSourceStore in project timbuctoo by HuygensING.
the class BdbRmlDataSourceStoreTest method itWorks.
@Test
public void itWorks() throws Exception {
BdbNonPersistentEnvironmentCreator dbCreator = new BdbNonPersistentEnvironmentCreator();
DataSetMetaData dataSetMetadata = new BasicDataSetMetaData("userid", "datasetid", "http://timbuctoo.huygens.knaw.nl/v5/userid/datasetid", "http://example.org/prefix/", false, false);
final RmlDataSourceStore rmlDataSourceStore = new BdbRmlDataSourceStore(dbCreator.getDatabase("userid", "datasetid", "rmlSource", true, TupleBinding.getPrimitiveBinding(String.class), TupleBinding.getPrimitiveBinding(String.class), new StringStringIsCleanHandler()), new ImportStatus(new LogList()));
RdfSerializer rdfSerializer = new RmlDataSourceRdfSerializer(rmlDataSourceStore);
RawUploadRdfSaver rawUploadRdfSaver = new RawUploadRdfSaver(dataSetMetadata, "fileName", APPLICATION_OCTET_STREAM_TYPE, rdfSerializer, "origFileName", Clock.systemUTC());
final String inputCol1 = rawUploadRdfSaver.addCollection("collection1");
ImportPropertyDescriptions importPropertyDescriptions = new ImportPropertyDescriptions();
importPropertyDescriptions.getOrCreate(1).setPropertyName("propName1");
importPropertyDescriptions.getOrCreate(2).setPropertyName("propName2");
rawUploadRdfSaver.addPropertyDescriptions(inputCol1, importPropertyDescriptions);
rawUploadRdfSaver.addEntity(inputCol1, ImmutableMap.of("propName1", "value1", "propName2", "val2"));
rawUploadRdfSaver.addEntity(inputCol1, ImmutableMap.of("propName1", "entVal1", "propName2", "entVal2"));
final String inputCol2 = rawUploadRdfSaver.addCollection("collection2");
ImportPropertyDescriptions importPropertyDescriptions1 = new ImportPropertyDescriptions();
importPropertyDescriptions1.getOrCreate(1).setPropertyName("prop3");
importPropertyDescriptions1.getOrCreate(2).setPropertyName("prop4");
rawUploadRdfSaver.addPropertyDescriptions(inputCol2, importPropertyDescriptions1);
rawUploadRdfSaver.addEntity(inputCol2, ImmutableMap.of("prop3", "value1", "prop4", "val2"));
rawUploadRdfSaver.addEntity(inputCol2, ImmutableMap.of("prop3", "entVal1", "prop4", "entVal2"));
rdfSerializer.close();
RdfDataSource rdfDataSource = new RdfDataSource(rmlDataSourceStore, inputCol1, new JexlRowFactory(ImmutableMap.of(), new HashMapBasedJoinHandler()));
RdfDataSource rdfDataSource2 = new RdfDataSource(rmlDataSourceStore, inputCol2, new JexlRowFactory(ImmutableMap.of(), new HashMapBasedJoinHandler()));
final List<String> collection1;
final List<String> collection2;
try (Stream<Row> stream = rdfDataSource.getRows(new ThrowingErrorHandler())) {
collection1 = stream.map(x -> x.getRawValue("propName1") + ":" + x.getRawValue("propName2")).collect(toList());
}
try (Stream<Row> stream = rdfDataSource2.getRows(new ThrowingErrorHandler())) {
collection2 = stream.map(x -> x.getRawValue("prop3") + ":" + x.getRawValue("prop4")).collect(toList());
}
assertThat(collection1, contains("value1:val2", "entVal1:entVal2"));
assertThat(collection2, contains("value1:val2", "entVal1:entVal2"));
dbCreator.close();
}
use of nl.knaw.huygens.timbuctoo.v5.datastores.implementations.bdb.BdbRmlDataSourceStore in project timbuctoo by HuygensING.
the class DataSet method dataSet.
public static DataSet dataSet(DataSetMetaData metadata, ExecutorService executorService, String rdfPrefix, BdbEnvironmentCreator dataStoreFactory, Runnable onUpdated, DataSetStorage dataSetStorage) throws IOException, DataStoreCreationException {
String userId = metadata.getOwnerId();
String dataSetId = metadata.getDataSetId();
File descriptionFile = dataSetStorage.getResourceSyncDescriptionFile();
FileStorage fileStorage = dataSetStorage.getFileStorage();
ImportManager importManager = new ImportManager(dataSetStorage.getLogList(), fileStorage, fileStorage, dataSetStorage.getLogStorage(), executorService, dataSetStorage.getRdfIo(), onUpdated);
try {
importManager.subscribeToRdf(new RdfDescriptionSaver(descriptionFile, metadata.getBaseUri(), importManager.getImportStatus()));
} catch (ParserConfigurationException | SAXException e) {
LOG.error("Could not construct import manager of data set", e);
}
final TupleBinding<String> stringBinding = TupleBinding.getPrimitiveBinding(String.class);
try {
StringStringIsCleanHandler stringStringIsCleanHandler = new StringStringIsCleanHandler();
BdbTripleStore quadStore = new BdbTripleStore(dataStoreFactory.getDatabase(userId, dataSetId, "rdfData", true, stringBinding, stringBinding, stringStringIsCleanHandler));
final BdbTypeNameStore typeNameStore = new BdbTypeNameStore(new BdbBackedData(dataStoreFactory.getDatabase(userId, dataSetId, "typenames", false, stringBinding, stringBinding, stringStringIsCleanHandler)), rdfPrefix);
final BdbSchemaStore schema = new BdbSchemaStore(new BdbBackedData(dataStoreFactory.getDatabase(userId, dataSetId, "schema", false, stringBinding, stringBinding, stringStringIsCleanHandler)), importManager.getImportStatus());
final BdbTruePatchStore truePatchStore = new BdbTruePatchStore(dataStoreFactory.getDatabase(userId, dataSetId, "truePatch", true, stringBinding, stringBinding, stringStringIsCleanHandler));
final TupleBinding<Integer> integerBinding = TupleBinding.getPrimitiveBinding(Integer.class);
final UpdatedPerPatchStore updatedPerPatchStore = new UpdatedPerPatchStore(dataStoreFactory.getDatabase(userId, dataSetId, "updatedPerPatch", true, integerBinding, stringBinding, new IsCleanHandler<Integer, String>() {
@Override
public Integer getKey() {
return Integer.MAX_VALUE;
}
@Override
public String getValue() {
return "isClean";
}
}));
final BdbRmlDataSourceStore rmlDataSourceStore = new BdbRmlDataSourceStore(dataStoreFactory.getDatabase(userId, dataSetId, "rmlSource", true, stringBinding, stringBinding, stringStringIsCleanHandler), importManager.getImportStatus());
VersionStore versionStore = new VersionStore(dataStoreFactory.getDatabase(userId, dataSetId, "versions", false, stringBinding, integerBinding, new IsCleanHandler<String, Integer>() {
@Override
public String getKey() {
return "isClean";
}
@Override
public Integer getValue() {
return Integer.MAX_VALUE;
}
}));
final StoreUpdater storeUpdater = new StoreUpdater(dataStoreFactory, quadStore, typeNameStore, truePatchStore, updatedPerPatchStore, Lists.newArrayList(schema, rmlDataSourceStore), versionStore, importManager.getImportStatus());
importManager.subscribeToRdf(storeUpdater);
ImmutableDataSet dataSet = ImmutableDataSet.builder().ownerId(userId).dataSetName(dataSetId).bdbEnvironmentCreator(dataStoreFactory).metadata(metadata).quadStore(quadStore).typeNameStore(typeNameStore).schemaStore(schema).dataSource(new RdfDataSourceFactory(rmlDataSourceStore)).schemaStore(schema).importManager(importManager).dataSetStorage(dataSetStorage).build();
importManager.init(dataSet);
if (!quadStore.isClean() || !typeNameStore.isClean() || !schema.isClean() || !truePatchStore.isClean() || !updatedPerPatchStore.isClean() || !rmlDataSourceStore.isClean() || !versionStore.isClean()) {
LOG.error("Data set '{}__{}' data is corrupted, starting to reimport.", userId, dataSetId);
quadStore.empty();
typeNameStore.empty();
schema.empty();
truePatchStore.empty();
updatedPerPatchStore.empty();
rmlDataSourceStore.empty();
versionStore.empty();
importManager.reprocessLogs();
} else {
// process unprocessed logs
importManager.processLogs();
}
return dataSet;
} catch (BdbDbCreationException e) {
throw new DataStoreCreationException(e.getCause());
}
}
Aggregations