Search in sources :

Example 6 with InsertTriples

use of org.apache.rya.indexing.pcj.fluo.api.InsertTriples in project incubator-rya by apache.

the class BatchIT method simpleJoinAdd.

@Test
public void simpleJoinAdd() throws Exception {
    final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + " <urn:predicate_2> ?object2 } ";
    try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
        RyaURI subj = new RyaURI("urn:subject_1");
        RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
        Set<RyaStatement> statements2 = getRyaStatements(statement2, 5);
        // Create the PCJ table.
        final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
        final String pcjId = pcjStorage.createPcj(sparql);
        // Tell the Fluo app to maintain the PCJ.
        String queryId = new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
        List<String> ids = getNodeIdStrings(fluoClient, queryId);
        String joinId = ids.get(2);
        String rightSp = ids.get(4);
        QueryBindingSet bs = new QueryBindingSet();
        bs.addBinding("subject", vf.createURI("urn:subject_1"));
        bs.addBinding("object1", vf.createURI("urn:object_0"));
        VisibilityBindingSet vBs = new VisibilityBindingSet(bs);
        URI uri = vf.createURI("urn:subject_1");
        Bytes prefixBytes = BindingHashShardingFunction.getShardedScanPrefix(rightSp, uri);
        Span span = Span.prefix(prefixBytes);
        // Stream the data into Fluo.
        InsertTriples inserter = new InsertTriples();
        inserter.insert(fluoClient, statements2, Optional.absent());
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(0, 0, 0, 0, 5));
        JoinBatchInformation batch = JoinBatchInformation.builder().setBatchSize(1).setColumn(FluoQueryColumns.STATEMENT_PATTERN_BINDING_SET).setSpan(span).setTask(Task.Add).setJoinType(JoinType.NATURAL_JOIN).setSide(Side.LEFT).setBs(vBs).build();
        // Verify the end results of the query match the expected results.
        createSpanBatch(fluoClient, joinId, batch);
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(5, 5, 5, 0, 5));
    }
}
Also used : FluoClient(org.apache.fluo.api.client.FluoClient) FluoClientImpl(org.apache.fluo.core.client.FluoClientImpl) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) InsertTriples(org.apache.rya.indexing.pcj.fluo.api.InsertTriples) RyaStatement(org.apache.rya.api.domain.RyaStatement) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) JoinBatchInformation(org.apache.rya.indexing.pcj.fluo.app.batch.JoinBatchInformation) URI(org.openrdf.model.URI) RyaURI(org.apache.rya.api.domain.RyaURI) Span(org.apache.fluo.api.data.Span) QueryBindingSet(org.openrdf.query.algebra.evaluation.QueryBindingSet) RyaURI(org.apache.rya.api.domain.RyaURI) Bytes(org.apache.fluo.api.data.Bytes) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) Test(org.junit.Test)

Example 7 with InsertTriples

use of org.apache.rya.indexing.pcj.fluo.api.InsertTriples in project incubator-rya by apache.

the class BatchIT method simpleJoinDelete.

@Test
public void simpleJoinDelete() throws Exception {
    final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + " <urn:predicate_2> ?object2 } ";
    try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
        RyaURI subj = new RyaURI("urn:subject_1");
        RyaStatement statement1 = new RyaStatement(subj, new RyaURI("urn:predicate_1"), null);
        RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
        Set<RyaStatement> statements1 = getRyaStatements(statement1, 5);
        Set<RyaStatement> statements2 = getRyaStatements(statement2, 5);
        // Create the PCJ table.
        final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
        final String pcjId = pcjStorage.createPcj(sparql);
        // Tell the Fluo app to maintain the PCJ.
        String queryId = new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
        List<String> ids = getNodeIdStrings(fluoClient, queryId);
        String joinId = ids.get(2);
        String rightSp = ids.get(4);
        QueryBindingSet bs = new QueryBindingSet();
        bs.addBinding("subject", vf.createURI("urn:subject_1"));
        bs.addBinding("object1", vf.createURI("urn:object_0"));
        VisibilityBindingSet vBs = new VisibilityBindingSet(bs);
        // create sharded span for deletion
        URI uri = vf.createURI("urn:subject_1");
        Bytes prefixBytes = BindingHashShardingFunction.getShardedScanPrefix(rightSp, uri);
        Span span = Span.prefix(prefixBytes);
        // Stream the data into Fluo.
        InsertTriples inserter = new InsertTriples();
        inserter.insert(fluoClient, statements1, Optional.absent());
        inserter.insert(fluoClient, statements2, Optional.absent());
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(25, 25, 25, 5, 5));
        JoinBatchInformation batch = JoinBatchInformation.builder().setBatchSize(1).setColumn(FluoQueryColumns.STATEMENT_PATTERN_BINDING_SET).setSpan(span).setTask(Task.Delete).setJoinType(JoinType.NATURAL_JOIN).setSide(Side.LEFT).setBs(vBs).build();
        // Verify the end results of the query match the expected results.
        createSpanBatch(fluoClient, joinId, batch);
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(25, 25, 20, 5, 5));
    }
}
Also used : FluoClient(org.apache.fluo.api.client.FluoClient) FluoClientImpl(org.apache.fluo.core.client.FluoClientImpl) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) InsertTriples(org.apache.rya.indexing.pcj.fluo.api.InsertTriples) RyaStatement(org.apache.rya.api.domain.RyaStatement) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) JoinBatchInformation(org.apache.rya.indexing.pcj.fluo.app.batch.JoinBatchInformation) URI(org.openrdf.model.URI) RyaURI(org.apache.rya.api.domain.RyaURI) Span(org.apache.fluo.api.data.Span) QueryBindingSet(org.openrdf.query.algebra.evaluation.QueryBindingSet) RyaURI(org.apache.rya.api.domain.RyaURI) Bytes(org.apache.fluo.api.data.Bytes) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) Test(org.junit.Test)

Example 8 with InsertTriples

use of org.apache.rya.indexing.pcj.fluo.api.InsertTriples in project incubator-rya by apache.

the class PeriodicNotificationBinPrunerIT method periodicPrunerTest.

@Test
public void periodicPrunerTest() throws Exception {
    String sparql = // n
    "prefix function: <http://org.apache.rya/function#> " + // n
    "prefix time: <http://www.w3.org/2006/time#> " + // n
    "select ?id (count(?obs) as ?total) where {" + // n
    "Filter(function:periodic(?time, 2, .5, time:hours)) " + // n
    "?obs <uri:hasTime> ?time. " + // n
    "?obs <uri:hasId> ?id } group by ?id";
    FluoClient fluo = new FluoClientImpl(super.getFluoConfiguration());
    // initialize resources and create pcj
    PeriodicQueryResultStorage periodicStorage = new AccumuloPeriodicQueryResultStorage(super.getAccumuloConnector(), getRyaInstanceName());
    CreatePeriodicQuery createPeriodicQuery = new CreatePeriodicQuery(fluo, periodicStorage);
    String queryId = FluoQueryUtils.convertFluoQueryIdToPcjId(createPeriodicQuery.createPeriodicQuery(sparql).getQueryId());
    // create statements to ingest into Fluo
    final ValueFactory vf = new ValueFactoryImpl();
    final DatatypeFactory dtf = DatatypeFactory.newInstance();
    ZonedDateTime time = ZonedDateTime.now();
    long currentTime = time.toInstant().toEpochMilli();
    ZonedDateTime zTime1 = time.minusMinutes(30);
    String time1 = zTime1.format(DateTimeFormatter.ISO_INSTANT);
    ZonedDateTime zTime2 = zTime1.minusMinutes(30);
    String time2 = zTime2.format(DateTimeFormatter.ISO_INSTANT);
    ZonedDateTime zTime3 = zTime2.minusMinutes(30);
    String time3 = zTime3.format(DateTimeFormatter.ISO_INSTANT);
    ZonedDateTime zTime4 = zTime3.minusMinutes(30);
    String time4 = zTime4.format(DateTimeFormatter.ISO_INSTANT);
    final Collection<Statement> statements = Sets.newHashSet(vf.createStatement(vf.createURI("urn:obs_1"), vf.createURI("uri:hasTime"), vf.createLiteral(dtf.newXMLGregorianCalendar(time1))), vf.createStatement(vf.createURI("urn:obs_1"), vf.createURI("uri:hasId"), vf.createLiteral("id_1")), vf.createStatement(vf.createURI("urn:obs_2"), vf.createURI("uri:hasTime"), vf.createLiteral(dtf.newXMLGregorianCalendar(time2))), vf.createStatement(vf.createURI("urn:obs_2"), vf.createURI("uri:hasId"), vf.createLiteral("id_2")), vf.createStatement(vf.createURI("urn:obs_3"), vf.createURI("uri:hasTime"), vf.createLiteral(dtf.newXMLGregorianCalendar(time3))), vf.createStatement(vf.createURI("urn:obs_3"), vf.createURI("uri:hasId"), vf.createLiteral("id_3")), vf.createStatement(vf.createURI("urn:obs_4"), vf.createURI("uri:hasTime"), vf.createLiteral(dtf.newXMLGregorianCalendar(time4))), vf.createStatement(vf.createURI("urn:obs_4"), vf.createURI("uri:hasId"), vf.createLiteral("id_4")), vf.createStatement(vf.createURI("urn:obs_1"), vf.createURI("uri:hasTime"), vf.createLiteral(dtf.newXMLGregorianCalendar(time4))), vf.createStatement(vf.createURI("urn:obs_1"), vf.createURI("uri:hasId"), vf.createLiteral("id_1")), vf.createStatement(vf.createURI("urn:obs_2"), vf.createURI("uri:hasTime"), vf.createLiteral(dtf.newXMLGregorianCalendar(time3))), vf.createStatement(vf.createURI("urn:obs_2"), vf.createURI("uri:hasId"), vf.createLiteral("id_2")));
    // add statements to Fluo
    InsertTriples inserter = new InsertTriples();
    statements.forEach(x -> inserter.insert(fluo, RdfToRyaConversions.convertStatement(x)));
    super.getMiniFluo().waitForObservers();
    // FluoITHelper.printFluoTable(fluo);
    // Create the expected results of the SPARQL query once the PCJ has been
    // computed.
    final Set<BindingSet> expected1 = new HashSet<>();
    final Set<BindingSet> expected2 = new HashSet<>();
    final Set<BindingSet> expected3 = new HashSet<>();
    final Set<BindingSet> expected4 = new HashSet<>();
    long period = 1800000;
    long binId = (currentTime / period) * period;
    long bin1 = binId;
    long bin2 = binId + period;
    long bin3 = binId + 2 * period;
    long bin4 = binId + 3 * period;
    MapBindingSet bs = new MapBindingSet();
    bs.addBinding("total", vf.createLiteral("2", XMLSchema.INTEGER));
    bs.addBinding("id", vf.createLiteral("id_1", XMLSchema.STRING));
    bs.addBinding("periodicBinId", vf.createLiteral(bin1));
    expected1.add(bs);
    bs = new MapBindingSet();
    bs.addBinding("total", vf.createLiteral("2", XMLSchema.INTEGER));
    bs.addBinding("id", vf.createLiteral("id_2", XMLSchema.STRING));
    bs.addBinding("periodicBinId", vf.createLiteral(bin1));
    expected1.add(bs);
    bs = new MapBindingSet();
    bs.addBinding("total", vf.createLiteral("1", XMLSchema.INTEGER));
    bs.addBinding("id", vf.createLiteral("id_3", XMLSchema.STRING));
    bs.addBinding("periodicBinId", vf.createLiteral(bin1));
    expected1.add(bs);
    bs = new MapBindingSet();
    bs.addBinding("total", vf.createLiteral("1", XMLSchema.INTEGER));
    bs.addBinding("id", vf.createLiteral("id_4", XMLSchema.STRING));
    bs.addBinding("periodicBinId", vf.createLiteral(bin1));
    expected1.add(bs);
    bs = new MapBindingSet();
    bs.addBinding("total", vf.createLiteral("1", XMLSchema.INTEGER));
    bs.addBinding("id", vf.createLiteral("id_1", XMLSchema.STRING));
    bs.addBinding("periodicBinId", vf.createLiteral(bin2));
    expected2.add(bs);
    bs = new MapBindingSet();
    bs.addBinding("total", vf.createLiteral("2", XMLSchema.INTEGER));
    bs.addBinding("id", vf.createLiteral("id_2", XMLSchema.STRING));
    bs.addBinding("periodicBinId", vf.createLiteral(bin2));
    expected2.add(bs);
    bs = new MapBindingSet();
    bs.addBinding("total", vf.createLiteral("1", XMLSchema.INTEGER));
    bs.addBinding("id", vf.createLiteral("id_3", XMLSchema.STRING));
    bs.addBinding("periodicBinId", vf.createLiteral(bin2));
    expected2.add(bs);
    bs = new MapBindingSet();
    bs.addBinding("total", vf.createLiteral("1", XMLSchema.INTEGER));
    bs.addBinding("id", vf.createLiteral("id_1", XMLSchema.STRING));
    bs.addBinding("periodicBinId", vf.createLiteral(bin3));
    expected3.add(bs);
    bs = new MapBindingSet();
    bs.addBinding("total", vf.createLiteral("1", XMLSchema.INTEGER));
    bs.addBinding("id", vf.createLiteral("id_2", XMLSchema.STRING));
    bs.addBinding("periodicBinId", vf.createLiteral(bin3));
    expected3.add(bs);
    bs = new MapBindingSet();
    bs.addBinding("total", vf.createLiteral("1", XMLSchema.INTEGER));
    bs.addBinding("id", vf.createLiteral("id_1", XMLSchema.STRING));
    bs.addBinding("periodicBinId", vf.createLiteral(bin4));
    expected4.add(bs);
    // make sure that expected and actual results align after ingest
    compareResults(periodicStorage, queryId, bin1, expected1);
    compareResults(periodicStorage, queryId, bin2, expected2);
    compareResults(periodicStorage, queryId, bin3, expected3);
    compareResults(periodicStorage, queryId, bin4, expected4);
    BlockingQueue<NodeBin> bins = new LinkedBlockingQueue<>();
    PeriodicQueryPrunerExecutor pruner = new PeriodicQueryPrunerExecutor(periodicStorage, fluo, 1, bins);
    pruner.start();
    bins.add(new NodeBin(queryId, bin1));
    bins.add(new NodeBin(queryId, bin2));
    bins.add(new NodeBin(queryId, bin3));
    bins.add(new NodeBin(queryId, bin4));
    Thread.sleep(10000);
    compareResults(periodicStorage, queryId, bin1, new HashSet<>());
    compareResults(periodicStorage, queryId, bin2, new HashSet<>());
    compareResults(periodicStorage, queryId, bin3, new HashSet<>());
    compareResults(periodicStorage, queryId, bin4, new HashSet<>());
    compareFluoCounts(fluo, queryId, bin1);
    compareFluoCounts(fluo, queryId, bin2);
    compareFluoCounts(fluo, queryId, bin3);
    compareFluoCounts(fluo, queryId, bin4);
    pruner.stop();
}
Also used : MapBindingSet(org.openrdf.query.impl.MapBindingSet) QueryBindingSet(org.openrdf.query.algebra.evaluation.QueryBindingSet) BindingSet(org.openrdf.query.BindingSet) FluoClient(org.apache.fluo.api.client.FluoClient) FluoClientImpl(org.apache.fluo.core.client.FluoClientImpl) DatatypeFactory(javax.xml.datatype.DatatypeFactory) InsertTriples(org.apache.rya.indexing.pcj.fluo.api.InsertTriples) NodeBin(org.apache.rya.periodic.notification.api.NodeBin) Statement(org.openrdf.model.Statement) AccumuloPeriodicQueryResultStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPeriodicQueryResultStorage) ValueFactoryImpl(org.openrdf.model.impl.ValueFactoryImpl) ValueFactory(org.openrdf.model.ValueFactory) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) ZonedDateTime(java.time.ZonedDateTime) MapBindingSet(org.openrdf.query.impl.MapBindingSet) CreatePeriodicQuery(org.apache.rya.indexing.pcj.fluo.api.CreatePeriodicQuery) AccumuloPeriodicQueryResultStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPeriodicQueryResultStorage) PeriodicQueryResultStorage(org.apache.rya.indexing.pcj.storage.PeriodicQueryResultStorage) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 9 with InsertTriples

use of org.apache.rya.indexing.pcj.fluo.api.InsertTriples in project incubator-rya by apache.

the class LoadTriplesCommand method execute.

@Override
public void execute(final Connector accumulo, final String ryaTablePrefix, final RyaSailRepository rya, final FluoClient fluo, final String[] args) throws ArgumentsException, ExecutionException {
    checkNotNull(accumulo);
    checkNotNull(fluo);
    checkNotNull(args);
    log.trace("Executing the Load Triples Command...");
    // Parse the command line arguments.
    final Parameters params = new Parameters();
    try {
        new JCommander(params, args);
    } catch (final ParameterException e) {
        throw new ArgumentsException("Could not load the Triples file because of invalid command line parameters.", e);
    }
    // Iterate over the Statements that are in the input file and write them to Fluo.
    log.trace("Loading RDF Statements from the Triples file '" + params.nTriplesFile + "'.");
    final Path triplesPath = Paths.get(params.nTriplesFile);
    try {
        final RDFParser parser = Rio.createParser(RDFFormat.forFileName(triplesPath.getFileName().toString()));
        final FluoLoader loader = new FluoLoader(fluo, new InsertTriples());
        parser.setRDFHandler(loader);
        parser.parse(Files.newInputStream(triplesPath), triplesPath.toUri().toString());
    } catch (final Exception e) {
        throw new ExecutionException("Could not load the RDF file into the Fluo app.", e);
    }
    log.trace("Finished executing the Load Triples Command.");
}
Also used : Path(java.nio.file.Path) InsertTriples(org.apache.rya.indexing.pcj.fluo.api.InsertTriples) JCommander(com.beust.jcommander.JCommander) FluoLoader(org.apache.rya.indexing.pcj.fluo.client.util.FluoLoader) ParameterException(com.beust.jcommander.ParameterException) RDFParser(org.openrdf.rio.RDFParser) ParameterException(com.beust.jcommander.ParameterException)

Example 10 with InsertTriples

use of org.apache.rya.indexing.pcj.fluo.api.InsertTriples in project incubator-rya by apache.

the class PeriodicNotificationApplicationIT method addData.

private void addData(final Collection<Statement> statements) throws DatatypeConfigurationException {
    // add statements to Fluo
    try (FluoClient fluo = new FluoClientImpl(getFluoConfiguration())) {
        final InsertTriples inserter = new InsertTriples();
        statements.forEach(x -> inserter.insert(fluo, RdfToRyaConversions.convertStatement(x)));
        getMiniFluo().waitForObservers();
    }
}
Also used : FluoClient(org.apache.fluo.api.client.FluoClient) FluoClientImpl(org.apache.fluo.core.client.FluoClientImpl) InsertTriples(org.apache.rya.indexing.pcj.fluo.api.InsertTriples)

Aggregations

InsertTriples (org.apache.rya.indexing.pcj.fluo.api.InsertTriples)14 FluoClient (org.apache.fluo.api.client.FluoClient)13 Test (org.junit.Test)12 RyaStatement (org.apache.rya.api.domain.RyaStatement)11 RyaURI (org.apache.rya.api.domain.RyaURI)11 CreateFluoPcj (org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj)11 PrecomputedJoinStorage (org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage)11 AccumuloPcjStorage (org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage)11 FluoClientImpl (org.apache.fluo.core.client.FluoClientImpl)8 BindingSet (org.openrdf.query.BindingSet)6 MapBindingSet (org.openrdf.query.impl.MapBindingSet)6 HashSet (java.util.HashSet)5 Connector (org.apache.accumulo.core.client.Connector)5 ValueFactory (org.openrdf.model.ValueFactory)5 ValueFactoryImpl (org.openrdf.model.impl.ValueFactoryImpl)5 Statement (org.openrdf.model.Statement)3 QueryBindingSet (org.openrdf.query.algebra.evaluation.QueryBindingSet)3 Bytes (org.apache.fluo.api.data.Bytes)2 Span (org.apache.fluo.api.data.Span)2 VisibilityBindingSet (org.apache.rya.api.model.VisibilityBindingSet)2