Search in sources :

Example 1 with Row

use of nl.knaw.huygens.timbuctoo.rml.Row in project timbuctoo by HuygensING.

the class BdbRmlDataSourceStoreTest method itWorks.

@Test
public void itWorks() throws Exception {
    BdbNonPersistentEnvironmentCreator dbCreator = new BdbNonPersistentEnvironmentCreator();
    DataSetMetaData dataSetMetadata = new BasicDataSetMetaData("userid", "datasetid", "http://timbuctoo.huygens.knaw.nl/v5/userid/datasetid", "http://example.org/prefix/", false, false);
    final RmlDataSourceStore rmlDataSourceStore = new BdbRmlDataSourceStore(dbCreator.getDatabase("userid", "datasetid", "rmlSource", true, TupleBinding.getPrimitiveBinding(String.class), TupleBinding.getPrimitiveBinding(String.class), new StringStringIsCleanHandler()), new ImportStatus(new LogList()));
    RdfSerializer rdfSerializer = new RmlDataSourceRdfSerializer(rmlDataSourceStore);
    RawUploadRdfSaver rawUploadRdfSaver = new RawUploadRdfSaver(dataSetMetadata, "fileName", APPLICATION_OCTET_STREAM_TYPE, rdfSerializer, "origFileName", Clock.systemUTC());
    final String inputCol1 = rawUploadRdfSaver.addCollection("collection1");
    ImportPropertyDescriptions importPropertyDescriptions = new ImportPropertyDescriptions();
    importPropertyDescriptions.getOrCreate(1).setPropertyName("propName1");
    importPropertyDescriptions.getOrCreate(2).setPropertyName("propName2");
    rawUploadRdfSaver.addPropertyDescriptions(inputCol1, importPropertyDescriptions);
    rawUploadRdfSaver.addEntity(inputCol1, ImmutableMap.of("propName1", "value1", "propName2", "val2"));
    rawUploadRdfSaver.addEntity(inputCol1, ImmutableMap.of("propName1", "entVal1", "propName2", "entVal2"));
    final String inputCol2 = rawUploadRdfSaver.addCollection("collection2");
    ImportPropertyDescriptions importPropertyDescriptions1 = new ImportPropertyDescriptions();
    importPropertyDescriptions1.getOrCreate(1).setPropertyName("prop3");
    importPropertyDescriptions1.getOrCreate(2).setPropertyName("prop4");
    rawUploadRdfSaver.addPropertyDescriptions(inputCol2, importPropertyDescriptions1);
    rawUploadRdfSaver.addEntity(inputCol2, ImmutableMap.of("prop3", "value1", "prop4", "val2"));
    rawUploadRdfSaver.addEntity(inputCol2, ImmutableMap.of("prop3", "entVal1", "prop4", "entVal2"));
    rdfSerializer.close();
    RdfDataSource rdfDataSource = new RdfDataSource(rmlDataSourceStore, inputCol1, new JexlRowFactory(ImmutableMap.of(), new HashMapBasedJoinHandler()));
    RdfDataSource rdfDataSource2 = new RdfDataSource(rmlDataSourceStore, inputCol2, new JexlRowFactory(ImmutableMap.of(), new HashMapBasedJoinHandler()));
    final List<String> collection1;
    final List<String> collection2;
    try (Stream<Row> stream = rdfDataSource.getRows(new ThrowingErrorHandler())) {
        collection1 = stream.map(x -> x.getRawValue("propName1") + ":" + x.getRawValue("propName2")).collect(toList());
    }
    try (Stream<Row> stream = rdfDataSource2.getRows(new ThrowingErrorHandler())) {
        collection2 = stream.map(x -> x.getRawValue("prop3") + ":" + x.getRawValue("prop4")).collect(toList());
    }
    assertThat(collection1, contains("value1:val2", "entVal1:entVal2"));
    assertThat(collection2, contains("value1:val2", "entVal1:entVal2"));
    dbCreator.close();
}
Also used : RawUploadRdfSaver(nl.knaw.huygens.timbuctoo.v5.bulkupload.RawUploadRdfSaver) ImportPropertyDescriptions(nl.knaw.huygens.timbuctoo.bulkupload.parsingstatemachine.ImportPropertyDescriptions) StringStringIsCleanHandler(nl.knaw.huygens.timbuctoo.v5.berkeleydb.isclean.StringStringIsCleanHandler) RmlDataSourceStore(nl.knaw.huygens.timbuctoo.v5.datastores.rmldatasource.RmlDataSourceStore) BdbNonPersistentEnvironmentCreator(nl.knaw.huygens.timbuctoo.v5.dropwizard.BdbNonPersistentEnvironmentCreator) RdfSerializer(nl.knaw.huygens.timbuctoo.v5.rdfio.RdfSerializer) JexlRowFactory(nl.knaw.huygens.timbuctoo.rml.datasource.jexl.JexlRowFactory) LogList(nl.knaw.huygens.timbuctoo.v5.dataset.dto.LogList) ImportStatus(nl.knaw.huygens.timbuctoo.v5.dataset.ImportStatus) HashMapBasedJoinHandler(nl.knaw.huygens.timbuctoo.rml.datasource.joinhandlers.HashMapBasedJoinHandler) BasicDataSetMetaData(nl.knaw.huygens.timbuctoo.v5.dataset.dto.BasicDataSetMetaData) ThrowingErrorHandler(nl.knaw.huygens.timbuctoo.rml.ThrowingErrorHandler) Row(nl.knaw.huygens.timbuctoo.rml.Row) DataSetMetaData(nl.knaw.huygens.timbuctoo.v5.dataset.dto.DataSetMetaData) BasicDataSetMetaData(nl.knaw.huygens.timbuctoo.v5.dataset.dto.BasicDataSetMetaData) RdfDataSource(nl.knaw.huygens.timbuctoo.v5.rml.RdfDataSource) Test(org.junit.Test)

Example 2 with Row

use of nl.knaw.huygens.timbuctoo.rml.Row in project timbuctoo by HuygensING.

the class RrTriplesMap method getItems.

Stream<Quad> getItems(ErrorHandler defaultErrorHandler) {
    final int[] numberOfItemsProcessed = new int[1];
    Stream<Quad> quadStream = dataSource.getRows(defaultErrorHandler).peek(e -> {
        numberOfItemsProcessed[0]++;
        if (numberOfItemsProcessed[0] == 1) {
            LoggerFactory.getLogger(RrTriplesMap.class).info("collection '{}' has at least one item", uri);
        }
    }).flatMap(row -> {
        Optional<RdfUri> subjectOpt = subjectMap.generateValue(row);
        if (subjectOpt.isPresent()) {
            RdfUri subject = subjectOpt.get();
            for (Tuple<RrRefObjectMap, String> subscription : subscriptions) {
                subscription.getLeft().onNewSubject(row.getRawValue(subscription.getRight()), subject);
            }
            return predicateObjectMaps.stream().flatMap(predicateObjectMap -> predicateObjectMap.generateValue(subject, row));
        } else {
            defaultErrorHandler.subjectGenerationFailed(uri, row);
            return Stream.empty();
        }
    });
    return quadStream;
}
Also used : LoggerFactory(org.slf4j.LoggerFactory) Tuple(nl.knaw.huygens.timbuctoo.util.Tuple) ErrorHandler(nl.knaw.huygens.timbuctoo.rml.ErrorHandler) Collectors(java.util.stream.Collectors) Row(nl.knaw.huygens.timbuctoo.rml.Row) ArrayList(java.util.ArrayList) List(java.util.List) Stream(java.util.stream.Stream) RdfUri(nl.knaw.huygens.timbuctoo.rml.dto.RdfUri) Quad(nl.knaw.huygens.timbuctoo.rml.dto.Quad) Optional(java.util.Optional) RrRefObjectMap(nl.knaw.huygens.timbuctoo.rml.rmldata.termmaps.RrRefObjectMap) DataSource(nl.knaw.huygens.timbuctoo.rml.DataSource) Quad(nl.knaw.huygens.timbuctoo.rml.dto.Quad) RdfUri(nl.knaw.huygens.timbuctoo.rml.dto.RdfUri) RrRefObjectMap(nl.knaw.huygens.timbuctoo.rml.rmldata.termmaps.RrRefObjectMap)

Aggregations

Row (nl.knaw.huygens.timbuctoo.rml.Row)2 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Optional (java.util.Optional)1 Collectors (java.util.stream.Collectors)1 Stream (java.util.stream.Stream)1 ImportPropertyDescriptions (nl.knaw.huygens.timbuctoo.bulkupload.parsingstatemachine.ImportPropertyDescriptions)1 DataSource (nl.knaw.huygens.timbuctoo.rml.DataSource)1 ErrorHandler (nl.knaw.huygens.timbuctoo.rml.ErrorHandler)1 ThrowingErrorHandler (nl.knaw.huygens.timbuctoo.rml.ThrowingErrorHandler)1 JexlRowFactory (nl.knaw.huygens.timbuctoo.rml.datasource.jexl.JexlRowFactory)1 HashMapBasedJoinHandler (nl.knaw.huygens.timbuctoo.rml.datasource.joinhandlers.HashMapBasedJoinHandler)1 Quad (nl.knaw.huygens.timbuctoo.rml.dto.Quad)1 RdfUri (nl.knaw.huygens.timbuctoo.rml.dto.RdfUri)1 RrRefObjectMap (nl.knaw.huygens.timbuctoo.rml.rmldata.termmaps.RrRefObjectMap)1 Tuple (nl.knaw.huygens.timbuctoo.util.Tuple)1 StringStringIsCleanHandler (nl.knaw.huygens.timbuctoo.v5.berkeleydb.isclean.StringStringIsCleanHandler)1 RawUploadRdfSaver (nl.knaw.huygens.timbuctoo.v5.bulkupload.RawUploadRdfSaver)1 ImportStatus (nl.knaw.huygens.timbuctoo.v5.dataset.ImportStatus)1 BasicDataSetMetaData (nl.knaw.huygens.timbuctoo.v5.dataset.dto.BasicDataSetMetaData)1