Search in sources :

Example 1 with ImportPropertyDescriptions

use of nl.knaw.huygens.timbuctoo.bulkupload.parsingstatemachine.ImportPropertyDescriptions in project timbuctoo by HuygensING.

the class BdbRmlDataSourceStoreTest method itWorks.

@Test
public void itWorks() throws Exception {
    BdbNonPersistentEnvironmentCreator dbCreator = new BdbNonPersistentEnvironmentCreator();
    DataSetMetaData dataSetMetadata = new BasicDataSetMetaData("userid", "datasetid", "http://timbuctoo.huygens.knaw.nl/v5/userid/datasetid", "http://example.org/prefix/", false, false);
    final RmlDataSourceStore rmlDataSourceStore = new BdbRmlDataSourceStore(dbCreator.getDatabase("userid", "datasetid", "rmlSource", true, TupleBinding.getPrimitiveBinding(String.class), TupleBinding.getPrimitiveBinding(String.class), new StringStringIsCleanHandler()), new ImportStatus(new LogList()));
    RdfSerializer rdfSerializer = new RmlDataSourceRdfSerializer(rmlDataSourceStore);
    RawUploadRdfSaver rawUploadRdfSaver = new RawUploadRdfSaver(dataSetMetadata, "fileName", APPLICATION_OCTET_STREAM_TYPE, rdfSerializer, "origFileName", Clock.systemUTC());
    final String inputCol1 = rawUploadRdfSaver.addCollection("collection1");
    ImportPropertyDescriptions importPropertyDescriptions = new ImportPropertyDescriptions();
    importPropertyDescriptions.getOrCreate(1).setPropertyName("propName1");
    importPropertyDescriptions.getOrCreate(2).setPropertyName("propName2");
    rawUploadRdfSaver.addPropertyDescriptions(inputCol1, importPropertyDescriptions);
    rawUploadRdfSaver.addEntity(inputCol1, ImmutableMap.of("propName1", "value1", "propName2", "val2"));
    rawUploadRdfSaver.addEntity(inputCol1, ImmutableMap.of("propName1", "entVal1", "propName2", "entVal2"));
    final String inputCol2 = rawUploadRdfSaver.addCollection("collection2");
    ImportPropertyDescriptions importPropertyDescriptions1 = new ImportPropertyDescriptions();
    importPropertyDescriptions1.getOrCreate(1).setPropertyName("prop3");
    importPropertyDescriptions1.getOrCreate(2).setPropertyName("prop4");
    rawUploadRdfSaver.addPropertyDescriptions(inputCol2, importPropertyDescriptions1);
    rawUploadRdfSaver.addEntity(inputCol2, ImmutableMap.of("prop3", "value1", "prop4", "val2"));
    rawUploadRdfSaver.addEntity(inputCol2, ImmutableMap.of("prop3", "entVal1", "prop4", "entVal2"));
    rdfSerializer.close();
    RdfDataSource rdfDataSource = new RdfDataSource(rmlDataSourceStore, inputCol1, new JexlRowFactory(ImmutableMap.of(), new HashMapBasedJoinHandler()));
    RdfDataSource rdfDataSource2 = new RdfDataSource(rmlDataSourceStore, inputCol2, new JexlRowFactory(ImmutableMap.of(), new HashMapBasedJoinHandler()));
    final List<String> collection1;
    final List<String> collection2;
    try (Stream<Row> stream = rdfDataSource.getRows(new ThrowingErrorHandler())) {
        collection1 = stream.map(x -> x.getRawValue("propName1") + ":" + x.getRawValue("propName2")).collect(toList());
    }
    try (Stream<Row> stream = rdfDataSource2.getRows(new ThrowingErrorHandler())) {
        collection2 = stream.map(x -> x.getRawValue("prop3") + ":" + x.getRawValue("prop4")).collect(toList());
    }
    assertThat(collection1, contains("value1:val2", "entVal1:entVal2"));
    assertThat(collection2, contains("value1:val2", "entVal1:entVal2"));
    dbCreator.close();
}
Also used : RawUploadRdfSaver(nl.knaw.huygens.timbuctoo.v5.bulkupload.RawUploadRdfSaver) ImportPropertyDescriptions(nl.knaw.huygens.timbuctoo.bulkupload.parsingstatemachine.ImportPropertyDescriptions) StringStringIsCleanHandler(nl.knaw.huygens.timbuctoo.v5.berkeleydb.isclean.StringStringIsCleanHandler) RmlDataSourceStore(nl.knaw.huygens.timbuctoo.v5.datastores.rmldatasource.RmlDataSourceStore) BdbNonPersistentEnvironmentCreator(nl.knaw.huygens.timbuctoo.v5.dropwizard.BdbNonPersistentEnvironmentCreator) RdfSerializer(nl.knaw.huygens.timbuctoo.v5.rdfio.RdfSerializer) JexlRowFactory(nl.knaw.huygens.timbuctoo.rml.datasource.jexl.JexlRowFactory) LogList(nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogList) ImportStatus(nl.knaw.huygens.timbuctoo.v5.dataset.ImportStatus) HashMapBasedJoinHandler(nl.knaw.huygens.timbuctoo.rml.datasource.joinhandlers.HashMapBasedJoinHandler) BasicDataSetMetaData(nl.knaw.huygens.timbuctoo.v5.dataset.dto.BasicDataSetMetaData) ThrowingErrorHandler(nl.knaw.huygens.timbuctoo.rml.ThrowingErrorHandler) Row(nl.knaw.huygens.timbuctoo.rml.Row) DataSetMetaData(nl.knaw.huygens.timbuctoo.v5.dataset.dto.DataSetMetaData) BasicDataSetMetaData(nl.knaw.huygens.timbuctoo.v5.dataset.dto.BasicDataSetMetaData) RdfDataSource(nl.knaw.huygens.timbuctoo.v5.rml.RdfDataSource) Test(org.junit.Test)

Example 2 with ImportPropertyDescriptions

use of nl.knaw.huygens.timbuctoo.bulkupload.parsingstatemachine.ImportPropertyDescriptions in project timbuctoo by HuygensING.

the class RawUploadRdfSaverTest method usageTest.

@Test
public void usageTest() throws LogStorageFailedException {
    RdfToStringFaker rdfSerializer = new RdfToStringFaker();
    RawUploadRdfSaver instance = instanceWithRdfSerializer(rdfSerializer, dataSetMetadata);
    final String collection1 = instance.addCollection("collection1");
    ImportPropertyDescriptions importPropertyDescriptions = new ImportPropertyDescriptions();
    importPropertyDescriptions.getOrCreate(1).setPropertyName("propName1");
    importPropertyDescriptions.getOrCreate(2).setPropertyName("propName2");
    instance.addPropertyDescriptions(collection1, importPropertyDescriptions);
    instance.addEntity(collection1, ImmutableMap.of("propName1", "value1", "propName2", "val2"));
    instance.addEntity(collection1, ImmutableMap.of("propName1", "entVal1", "propName2", "entVal2"));
    final String collection2 = instance.addCollection("collection2");
    ImportPropertyDescriptions importPropertyDescriptions1 = new ImportPropertyDescriptions();
    importPropertyDescriptions1.getOrCreate(1).setPropertyName("prop3");
    importPropertyDescriptions1.getOrCreate(2).setPropertyName("prop4");
    instance.addPropertyDescriptions(collection2, importPropertyDescriptions1);
    instance.addEntity(collection2, ImmutableMap.of("prop3", "value1", "prop4", "val2"));
    instance.addEntity(collection2, ImmutableMap.of("prop3", "entVal1", "prop4", "entVal2"));
    String generatedRdf = rdfSerializer.toString();
    // Use assertEquals because the failing Hamcrest output is hard to compare
    String graphName = dataSetMetadata.getBaseUri();
    String fileUri = dataSetMetadata.getUriPrefix() + "rawData/fileName/";
    String prop = fileUri + "props/";
    String rowData = fileUri + "entities/";
    assertEquals(fileUri + " " + RDF_TYPE + " " + TIM_TABULAR_FILE + " " + graphName + "\n" + graphName + " " + PROV_DERIVED_FROM + " " + fileUri + " " + graphName + "\n" + fileUri + " " + TIM_MIMETYPE + " " + "application/octet-stream" + "^^" + STRING + " " + graphName + "\n" + fileUri + " " + RDFS_LABEL + " " + FILE_NAME + "^^" + STRING + " " + graphName + "\n" + fileUri + " " + PROV_ATTIME + " " + DATE + "^^" + XSD_DATETIMESTAMP + " " + graphName + "\n" + collection1 + " " + RDF_TYPE + " " + collection1 + "type " + graphName + "\n" + collection1 + " " + RDF_TYPE + " " + TIM_TABULAR_COLLECTION + " " + graphName + "\n" + collection1 + " " + RDFS_LABEL + " collection1" + "^^" + STRING + " " + graphName + "\n" + fileUri + " " + TIM_HASCOLLECTION + " " + collection1 + " " + graphName + "\n" + fileUri + " " + TIMBUCTOO_NEXT + " " + collection1 + " " + graphName + "\n" + prop + "tim_id " + RDF_TYPE + " " + TIM_PROP_DESC + " " + graphName + "\n" + collection1 + " " + TIM_HAS_PROPERTY + " " + prop + "tim_id " + graphName + "\n" + prop + "tim_id " + TIM_PROP_ID + " -1" + "^^" + INTEGER + " " + graphName + "\n" + prop + "tim_id " + RDFS_LABEL + " tim_id" + "^^" + STRING + " " + graphName + "\n" + prop + "propName1 " + RDF_TYPE + " " + TIM_PROP_DESC + " " + graphName + "\n" + collection1 + " " + TIM_HAS_PROPERTY + " " + prop + "propName1 " + graphName + "\n" + prop + "propName1 " + TIM_PROP_ID + " 1" + "^^" + INTEGER + " " + graphName + "\n" + prop + "propName1 " + RDFS_LABEL + " propName1" + "^^" + STRING + " " + graphName + "\n" + prop + "tim_id " + TIMBUCTOO_NEXT + " " + prop + "propName1 " + graphName + "\n" + prop + "propName2 " + RDF_TYPE + " " + TIM_PROP_DESC + " " + graphName + "\n" + collection1 + " " + TIM_HAS_PROPERTY + " " + prop + "propName2 " + graphName + "\n" + prop + "propName2 " + TIM_PROP_ID + " 2" + "^^" + INTEGER + " " + graphName + "\n" + prop + "propName2 " + RDFS_LABEL + " propName2" + "^^" + STRING + " " + graphName + "\n" + prop + "propName1 " + TIMBUCTOO_NEXT + " " + prop + "propName2 " + graphName + "\n" + rowData + "1 " + RDF_TYPE + " " + collection1 + " " + graphName + "\n" + collection1 + " " + TIM_HAS_ROW + " " + rowData + "1 " + graphName + "\n" + rowData + "1 " + prop + "propName1" + " value1" + "^^" + STRING + " " + graphName + "\n" + rowData + "1 " + prop + "propName2" + " val2" + "^^" + STRING + " " + graphName + "\n" + rowData + "1 " + prop + "tim_id" + " {UUID}" + "^^" + STRING + " " + graphName + "\n" + rowData + "2 " + RDF_TYPE + " " + collection1 + " " + graphName + "\n" + collection1 + " " + TIM_HAS_ROW + " " + rowData + "2 " + graphName + "\n" + rowData + "2 " + prop + "propName1" + " entVal1" + "^^" + STRING + " " + graphName + "\n" + rowData + "2 " + prop + "propName2" + " entVal2" + "^^" + STRING + " " + graphName + "\n" + rowData + "2 " + prop + "tim_id" + " {UUID}" + "^^" + STRING + " " + graphName + "\n" + collection2 + " " + RDF_TYPE + " " + collection2 + "type " + graphName + "\n" + collection2 + " " + RDF_TYPE + " " + TIM_TABULAR_COLLECTION + " " + graphName + "\n" + collection2 + " " + RDFS_LABEL + " collection2" + "^^" + STRING + " " + graphName + "\n" + fileUri + " " + TIM_HASCOLLECTION + " " + collection2 + " " + graphName + "\n" + collection1 + " " + TIMBUCTOO_NEXT + " " + collection2 + " " + graphName + "\n" + prop + "tim_id " + RDF_TYPE + " " + TIM_PROP_DESC + " " + graphName + "\n" + collection2 + " " + TIM_HAS_PROPERTY + " " + prop + "tim_id " + graphName + "\n" + prop + "tim_id " + TIM_PROP_ID + " -1" + "^^" + INTEGER + " " + graphName + "\n" + prop + "tim_id " + RDFS_LABEL + " tim_id" + "^^" + STRING + " " + graphName + "\n" + prop + "prop3 " + RDF_TYPE + " " + TIM_PROP_DESC + " " + graphName + "\n" + collection2 + " " + TIM_HAS_PROPERTY + " " + prop + "prop3 " + graphName + "\n" + prop + "prop3 " + TIM_PROP_ID + " 1" + "^^" + INTEGER + " " + graphName + "\n" + prop + "prop3 " + RDFS_LABEL + " prop3" + "^^" + STRING + " " + graphName + "\n" + prop + "tim_id " + TIMBUCTOO_NEXT + " " + prop + "prop3 " + graphName + "\n" + prop + "prop4 " + RDF_TYPE + " " + TIM_PROP_DESC + " " + graphName + "\n" + collection2 + " " + TIM_HAS_PROPERTY + " " + prop + "prop4 " + graphName + "\n" + prop + "prop4 " + TIM_PROP_ID + " 2" + "^^" + INTEGER + " " + graphName + "\n" + prop + "prop4 " + RDFS_LABEL + " prop4" + "^^" + STRING + " " + graphName + "\n" + prop + "prop3 " + TIMBUCTOO_NEXT + " " + prop + "prop4 " + graphName + "\n" + rowData + "3 " + RDF_TYPE + " " + collection2 + " " + graphName + "\n" + collection2 + " " + TIM_HAS_ROW + " " + rowData + "3 " + graphName + "\n" + rowData + "3 " + prop + "prop3" + " value1" + "^^" + STRING + " " + graphName + "\n" + rowData + "3 " + prop + "prop4" + " val2" + "^^" + STRING + " " + graphName + "\n" + rowData + "3 " + prop + "tim_id" + " {UUID}" + "^^" + STRING + " " + graphName + "\n" + rowData + "4 " + RDF_TYPE + " " + collection2 + " " + graphName + "\n" + collection2 + " " + TIM_HAS_ROW + " " + rowData + "4 " + graphName + "\n" + rowData + "4 " + prop + "prop3" + " entVal1" + "^^" + STRING + " " + graphName + "\n" + rowData + "4 " + prop + "prop4" + " entVal2" + "^^" + STRING + " " + graphName + "\n" + rowData + "4 " + prop + "tim_id" + " {UUID}" + "^^" + STRING + " " + graphName + "\n", generatedRdf.replaceAll("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", "{UUID}"));
}
Also used : RawUploadRdfSaver(nl.knaw.huygens.timbuctoo.v5.bulkupload.RawUploadRdfSaver) ImportPropertyDescriptions(nl.knaw.huygens.timbuctoo.bulkupload.parsingstatemachine.ImportPropertyDescriptions) Matchers.containsString(org.hamcrest.Matchers.containsString) Test(org.junit.Test)

Example 3 with ImportPropertyDescriptions

use of nl.knaw.huygens.timbuctoo.bulkupload.parsingstatemachine.ImportPropertyDescriptions in project timbuctoo by HuygensING.

the class RawUploadRdfSaverTest method addPropertyDescriptionCreatesAPropertyDescription.

@Test
public void addPropertyDescriptionCreatesAPropertyDescription() throws Exception {
    String collection = instance.addCollection(COLLECTION);
    ImportPropertyDescriptions importPropertyDescriptions = new ImportPropertyDescriptions();
    importPropertyDescriptions.getOrCreate(1).setPropertyName("propName");
    instance.addPropertyDescriptions(collection, importPropertyDescriptions);
    verify(rdfSerializer).onRelation(argThat(containsString("propName")), eq(RDF_TYPE), eq(TIM_PROP_DESC), eq(dataSetMetadata.getBaseUri()));
    verify(rdfSerializer).onValue(argThat(containsString("propName")), eq(TIM_PROP_ID), eq("1"), eq(INTEGER), eq(dataSetMetadata.getBaseUri()));
    verify(rdfSerializer).onRelation(argThat(containsString("tim_id")), eq(TIMBUCTOO_NEXT), argThat(containsString("propName")), eq(dataSetMetadata.getBaseUri()));
    verify(rdfSerializer).onValue(argThat(containsString("propName")), eq(RDFS_LABEL), eq("propName"), eq(STRING), eq(dataSetMetadata.getBaseUri()));
}
Also used : ImportPropertyDescriptions(nl.knaw.huygens.timbuctoo.bulkupload.parsingstatemachine.ImportPropertyDescriptions) Matchers.containsString(org.hamcrest.Matchers.containsString) Test(org.junit.Test)

Example 4 with ImportPropertyDescriptions

use of nl.knaw.huygens.timbuctoo.bulkupload.parsingstatemachine.ImportPropertyDescriptions in project timbuctoo by HuygensING.

the class TinkerpopSaverTest method addPropertyDescriptionsStoresTheOrderOfThePropertyDescriptions.

@Test
public void addPropertyDescriptionsStoresTheOrderOfThePropertyDescriptions() {
    final TinkerpopSaver instance = new TinkerpopSaver(vres, graphWrapper, VRE_NAME, VRE_NAME, MAX_VERTICES_PER_TRANSACTION, VRE_NAME);
    ImportPropertyDescriptions importPropertyDescriptions = new ImportPropertyDescriptions();
    importPropertyDescriptions.getOrCreate(6).setPropertyName("first");
    importPropertyDescriptions.getOrCreate(5).setPropertyName("second");
    importPropertyDescriptions.getOrCreate(7).setPropertyName("third");
    instance.addPropertyDescriptions(rawCollection, importPropertyDescriptions);
    Iterator<Vertex> hasFirstProperty = rawCollection.vertices(Direction.OUT, "hasFirstProperty");
    assertThat(hasFirstProperty.hasNext(), is(true));
    Vertex first = hasFirstProperty.next();
    assertThat(first, likeVertex().withProperty("id", 6).withProperty("name", "first"));
    Iterator<Vertex> hasNextProperty = first.vertices(Direction.OUT, "hasNextProperty");
    assertThat(hasNextProperty.hasNext(), is(true));
    Vertex second = hasNextProperty.next();
    assertThat(second, is(likeVertex().withProperty("id", 5).withProperty("name", "second")));
    Iterator<Vertex> hasNextProperty2 = second.vertices(Direction.OUT, "hasNextProperty");
    assertThat(hasNextProperty2.hasNext(), is(true));
    assertThat(hasNextProperty2.next(), is(likeVertex().withProperty("id", 7).withProperty("name", "third")));
}
Also used : ImportPropertyDescriptions(nl.knaw.huygens.timbuctoo.bulkupload.parsingstatemachine.ImportPropertyDescriptions) Vertex(org.apache.tinkerpop.gremlin.structure.Vertex) VertexMatcher.likeVertex(nl.knaw.huygens.timbuctoo.util.VertexMatcher.likeVertex) TinkerpopSaver(nl.knaw.huygens.timbuctoo.database.tinkerpop.TinkerpopSaver) Test(org.junit.Test)

Example 5 with ImportPropertyDescriptions

use of nl.knaw.huygens.timbuctoo.bulkupload.parsingstatemachine.ImportPropertyDescriptions in project timbuctoo by HuygensING.

the class RawUploadRdfSaverTest method addPropertyDescriptionAddsAllThePropertyDescriptionsToTheCollection.

@Test
public void addPropertyDescriptionAddsAllThePropertyDescriptionsToTheCollection() throws Exception {
    String collection = instance.addCollection(COLLECTION);
    ImportPropertyDescriptions importPropertyDescriptions = new ImportPropertyDescriptions();
    importPropertyDescriptions.getOrCreate(1).setPropertyName("propName1");
    importPropertyDescriptions.getOrCreate(2).setPropertyName("propName2");
    instance.addPropertyDescriptions(collection, importPropertyDescriptions);
    verify(rdfSerializer).onRelation(eq(collection), eq(TIM_HAS_PROPERTY), argThat(containsString("propName1")), eq(dataSetMetadata.getBaseUri()));
    verify(rdfSerializer).onRelation(eq(collection), eq(TIM_HAS_PROPERTY), argThat(containsString("propName2")), eq(dataSetMetadata.getBaseUri()));
}
Also used : ImportPropertyDescriptions(nl.knaw.huygens.timbuctoo.bulkupload.parsingstatemachine.ImportPropertyDescriptions) Matchers.containsString(org.hamcrest.Matchers.containsString) Test(org.junit.Test)

Aggregations

ImportPropertyDescriptions (nl.knaw.huygens.timbuctoo.bulkupload.parsingstatemachine.ImportPropertyDescriptions)6 Test (org.junit.Test)6 Matchers.containsString (org.hamcrest.Matchers.containsString)3 TinkerpopSaver (nl.knaw.huygens.timbuctoo.database.tinkerpop.TinkerpopSaver)2 VertexMatcher.likeVertex (nl.knaw.huygens.timbuctoo.util.VertexMatcher.likeVertex)2 RawUploadRdfSaver (nl.knaw.huygens.timbuctoo.v5.bulkupload.RawUploadRdfSaver)2 Vertex (org.apache.tinkerpop.gremlin.structure.Vertex)2 Row (nl.knaw.huygens.timbuctoo.rml.Row)1 ThrowingErrorHandler (nl.knaw.huygens.timbuctoo.rml.ThrowingErrorHandler)1 JexlRowFactory (nl.knaw.huygens.timbuctoo.rml.datasource.jexl.JexlRowFactory)1 HashMapBasedJoinHandler (nl.knaw.huygens.timbuctoo.rml.datasource.joinhandlers.HashMapBasedJoinHandler)1 StringStringIsCleanHandler (nl.knaw.huygens.timbuctoo.v5.berkeleydb.isclean.StringStringIsCleanHandler)1 ImportStatus (nl.knaw.huygens.timbuctoo.v5.dataset.ImportStatus)1 BasicDataSetMetaData (nl.knaw.huygens.timbuctoo.v5.dataset.dto.BasicDataSetMetaData)1 DataSetMetaData (nl.knaw.huygens.timbuctoo.v5.dataset.dto.DataSetMetaData)1 LogList (nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogList)1 RmlDataSourceStore (nl.knaw.huygens.timbuctoo.v5.datastores.rmldatasource.RmlDataSourceStore)1 BdbNonPersistentEnvironmentCreator (nl.knaw.huygens.timbuctoo.v5.dropwizard.BdbNonPersistentEnvironmentCreator)1 RdfSerializer (nl.knaw.huygens.timbuctoo.v5.rdfio.RdfSerializer)1 RdfDataSource (nl.knaw.huygens.timbuctoo.v5.rml.RdfDataSource)1