Search in sources :

Example 11 with CreateFluoPcj

use of org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj in project incubator-rya by apache.

the class BatchIT method simpleJoinDelete.

@Test
public void simpleJoinDelete() throws Exception {
    final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + " <urn:predicate_2> ?object2 } ";
    try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
        RyaURI subj = new RyaURI("urn:subject_1");
        RyaStatement statement1 = new RyaStatement(subj, new RyaURI("urn:predicate_1"), null);
        RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
        Set<RyaStatement> statements1 = getRyaStatements(statement1, 5);
        Set<RyaStatement> statements2 = getRyaStatements(statement2, 5);
        // Create the PCJ table.
        final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
        final String pcjId = pcjStorage.createPcj(sparql);
        // Tell the Fluo app to maintain the PCJ.
        String queryId = new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
        List<String> ids = getNodeIdStrings(fluoClient, queryId);
        String joinId = ids.get(2);
        String rightSp = ids.get(4);
        QueryBindingSet bs = new QueryBindingSet();
        bs.addBinding("subject", vf.createURI("urn:subject_1"));
        bs.addBinding("object1", vf.createURI("urn:object_0"));
        VisibilityBindingSet vBs = new VisibilityBindingSet(bs);
        // create sharded span for deletion
        URI uri = vf.createURI("urn:subject_1");
        Bytes prefixBytes = BindingHashShardingFunction.getShardedScanPrefix(rightSp, uri);
        Span span = Span.prefix(prefixBytes);
        // Stream the data into Fluo.
        InsertTriples inserter = new InsertTriples();
        inserter.insert(fluoClient, statements1, Optional.absent());
        inserter.insert(fluoClient, statements2, Optional.absent());
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(25, 25, 25, 5, 5));
        JoinBatchInformation batch = JoinBatchInformation.builder().setBatchSize(1).setColumn(FluoQueryColumns.STATEMENT_PATTERN_BINDING_SET).setSpan(span).setTask(Task.Delete).setJoinType(JoinType.NATURAL_JOIN).setSide(Side.LEFT).setBs(vBs).build();
        // Verify the end results of the query match the expected results.
        createSpanBatch(fluoClient, joinId, batch);
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(25, 25, 20, 5, 5));
    }
}
Also used : FluoClient(org.apache.fluo.api.client.FluoClient) FluoClientImpl(org.apache.fluo.core.client.FluoClientImpl) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) InsertTriples(org.apache.rya.indexing.pcj.fluo.api.InsertTriples) RyaStatement(org.apache.rya.api.domain.RyaStatement) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) JoinBatchInformation(org.apache.rya.indexing.pcj.fluo.app.batch.JoinBatchInformation) URI(org.openrdf.model.URI) RyaURI(org.apache.rya.api.domain.RyaURI) Span(org.apache.fluo.api.data.Span) QueryBindingSet(org.openrdf.query.algebra.evaluation.QueryBindingSet) RyaURI(org.apache.rya.api.domain.RyaURI) Bytes(org.apache.fluo.api.data.Bytes) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) Test(org.junit.Test)

Example 12 with CreateFluoPcj

use of org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj in project incubator-rya by apache.

the class PeriodicNotificationProviderIT method testProvider.

@Test
public void testProvider() throws MalformedQueryException, InterruptedException, UnsupportedQueryException {
    String sparql = // n
    "prefix function: <http://org.apache.rya/function#> " + // n
    "prefix time: <http://www.w3.org/2006/time#> " + // n
    "select ?id (count(?obs) as ?total) where {" + // n
    "Filter(function:periodic(?time, 1, .25, time:minutes)) " + // n
    "?obs <uri:hasTime> ?time. " + // n
    "?obs <uri:hasId> ?id } group by ?id";
    BlockingQueue<TimestampedNotification> notifications = new LinkedBlockingQueue<>();
    PeriodicNotificationCoordinatorExecutor coord = new PeriodicNotificationCoordinatorExecutor(2, notifications);
    PeriodicNotificationProvider provider = new PeriodicNotificationProvider();
    CreateFluoPcj pcj = new CreateFluoPcj();
    String id = null;
    try (FluoClient fluo = new FluoClientImpl(getFluoConfiguration())) {
        id = pcj.createPcj(FluoQueryUtils.createNewPcjId(), sparql, Sets.newHashSet(), fluo).getQueryId();
        provider.processRegisteredNotifications(coord, fluo.newSnapshot());
    }
    TimestampedNotification notification = notifications.take();
    Assert.assertEquals(5000, notification.getInitialDelay());
    Assert.assertEquals(15000, notification.getPeriod());
    Assert.assertEquals(TimeUnit.MILLISECONDS, notification.getTimeUnit());
    Assert.assertEquals(FluoQueryUtils.convertFluoQueryIdToPcjId(id), notification.getId());
}
Also used : FluoClient(org.apache.fluo.api.client.FluoClient) FluoClientImpl(org.apache.fluo.core.client.FluoClientImpl) PeriodicNotificationCoordinatorExecutor(org.apache.rya.periodic.notification.coordinator.PeriodicNotificationCoordinatorExecutor) TimestampedNotification(org.apache.rya.periodic.notification.notification.TimestampedNotification) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) PeriodicNotificationProvider(org.apache.rya.periodic.notification.recovery.PeriodicNotificationProvider) Test(org.junit.Test)

Example 13 with CreateFluoPcj

use of org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj in project incubator-rya by apache.

the class BatchIT method multiJoinBatchIntegrationTest.

@Test
public void multiJoinBatchIntegrationTest() throws Exception {
    final String sparql = "SELECT ?subject1 ?subject2 ?object1 ?object2 WHERE { ?subject1 <urn:predicate_1> ?object1; " + " <urn:predicate_2> ?object2 ." + " ?subject2 <urn:predicate_3> ?object2 } ";
    try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
        RyaURI subj1 = new RyaURI("urn:subject_1");
        RyaStatement statement1 = new RyaStatement(subj1, new RyaURI("urn:predicate_1"), null);
        RyaStatement statement2 = new RyaStatement(subj1, new RyaURI("urn:predicate_2"), null);
        Set<RyaStatement> statements1 = getRyaStatements(statement1, 10);
        Set<RyaStatement> statements2 = getRyaStatements(statement2, 10);
        RyaURI subj2 = new RyaURI("urn:subject_2");
        RyaStatement statement3 = new RyaStatement(subj2, new RyaURI("urn:predicate_3"), null);
        Set<RyaStatement> statements3 = getRyaStatements(statement3, 10);
        // Create the PCJ table.
        final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
        final String pcjId = pcjStorage.createPcj(sparql);
        // Tell the Fluo app to maintain the PCJ and sets batch scan size for StatementPatterns to 5 and
        // batch size of joins to 5.
        String queryId = new CreateFluoPcj(5, 5).withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
        List<String> ids = getNodeIdStrings(fluoClient, queryId);
        // Stream the data into Fluo.
        InsertTriples inserter = new InsertTriples();
        inserter.insert(fluoClient, statements1, Optional.absent());
        inserter.insert(fluoClient, statements2, Optional.absent());
        inserter.insert(fluoClient, statements3, Optional.absent());
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(100, 100, 100, 100, 10, 10, 10));
    }
}
Also used : RyaURI(org.apache.rya.api.domain.RyaURI) FluoClient(org.apache.fluo.api.client.FluoClient) FluoClientImpl(org.apache.fluo.core.client.FluoClientImpl) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) InsertTriples(org.apache.rya.indexing.pcj.fluo.api.InsertTriples) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) RyaStatement(org.apache.rya.api.domain.RyaStatement) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) Test(org.junit.Test)

Example 14 with CreateFluoPcj

use of org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj in project incubator-rya by apache.

the class QueryIT method runTest.

public void runTest(final String sparql, final Collection<Statement> statements, final Collection<BindingSet> expectedResults, final ExportStrategy strategy) throws Exception {
    requireNonNull(sparql);
    requireNonNull(statements);
    requireNonNull(expectedResults);
    // Register the PCJ with Rya.
    final Connector accumuloConn = super.getAccumuloConnector();
    final RyaClient ryaClient = AccumuloRyaClientFactory.build(createConnectionDetails(), accumuloConn);
    switch(strategy) {
        case RYA:
            ryaClient.getCreatePCJ().createPCJ(getRyaInstanceName(), sparql);
            addStatementsAndWait(statements);
            // Fetch the value that is stored within the PCJ table.
            try (final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(accumuloConn, getRyaInstanceName())) {
                final String pcjId = pcjStorage.listPcjs().get(0);
                final Set<BindingSet> results = Sets.newHashSet(pcjStorage.listResults(pcjId));
                // Ensure the result of the query matches the expected result.
                assertEquals(expectedResults, results);
            }
            break;
        case PERIODIC:
            final PeriodicQueryResultStorage periodicStorage = new AccumuloPeriodicQueryResultStorage(accumuloConn, getRyaInstanceName());
            final String periodicId = periodicStorage.createPeriodicQuery(sparql);
            try (FluoClient fluo = new FluoClientImpl(super.getFluoConfiguration())) {
                new CreateFluoPcj().createPcj(periodicId, sparql, Sets.newHashSet(ExportStrategy.PERIODIC), fluo);
            }
            addStatementsAndWait(statements);
            final Set<BindingSet> results = Sets.newHashSet();
            try (CloseableIterator<BindingSet> resultIter = periodicStorage.listResults(periodicId, Optional.empty())) {
                while (resultIter.hasNext()) {
                    results.add(resultIter.next());
                }
            }
            assertEquals(expectedResults, results);
            break;
        default:
            throw new RuntimeException("Invalid export option");
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) MapBindingSet(org.openrdf.query.impl.MapBindingSet) BindingSet(org.openrdf.query.BindingSet) FluoClient(org.apache.fluo.api.client.FluoClient) FluoClientImpl(org.apache.fluo.core.client.FluoClientImpl) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) AccumuloPeriodicQueryResultStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPeriodicQueryResultStorage) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) RyaClient(org.apache.rya.api.client.RyaClient) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) AccumuloPeriodicQueryResultStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPeriodicQueryResultStorage) PeriodicQueryResultStorage(org.apache.rya.indexing.pcj.storage.PeriodicQueryResultStorage)

Example 15 with CreateFluoPcj

use of org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj in project incubator-rya by apache.

the class RyaInputIncrementalUpdateIT method historicThenStreamedResults.

/**
 * Simulates the case where a Triple is added to Rya, a new query that
 * includes that triple as a historic match is inserted into Fluo, and then
 * some new triple that matches the query is streamed into Fluo. The query's
 * results must include both the historic result and the newly streamed
 * result.
 */
@Test
public void historicThenStreamedResults() throws Exception {
    // A query that finds people who talk to Eve and work at Chipotle.
    final String sparql = "SELECT ?x " + "WHERE { " + "?x <http://talksTo> <http://Eve>. " + "?x <http://worksAt> <http://Chipotle>." + "}";
    // Triples that are loaded into Rya before the PCJ is created.
    final ValueFactory vf = new ValueFactoryImpl();
    final Set<Statement> historicTriples = Sets.newHashSet(vf.createStatement(vf.createURI("http://Alice"), vf.createURI("http://talksTo"), vf.createURI("http://Eve")), vf.createStatement(vf.createURI("http://Alice"), vf.createURI("http://worksAt"), vf.createURI("http://Chipotle")), vf.createStatement(vf.createURI("http://Joe"), vf.createURI("http://worksAt"), vf.createURI("http://Chipotle")));
    // Triples that will be streamed into Fluo after the PCJ has been
    final Set<Statement> streamedTriples = Sets.newHashSet(vf.createStatement(vf.createURI("http://Frank"), vf.createURI("http://talksTo"), vf.createURI("http://Eve")), vf.createStatement(vf.createURI("http://Joe"), vf.createURI("http://talksTo"), vf.createURI("http://Eve")), vf.createStatement(vf.createURI("http://Frank"), vf.createURI("http://worksAt"), vf.createURI("http://Chipotle")));
    // Load the historic data into Rya.
    final SailRepositoryConnection ryaConn = super.getRyaSailRepository().getConnection();
    for (final Statement triple : historicTriples) {
        ryaConn.add(triple);
    }
    // Create the PCJ table.
    final Connector accumuloConn = super.getAccumuloConnector();
    final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(accumuloConn, getRyaInstanceName());
    final String pcjId = pcjStorage.createPcj(sparql);
    try (FluoClient fluoClient = FluoFactory.newClient(super.getFluoConfiguration())) {
        // Tell the Fluo app to maintain the PCJ.
        new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, accumuloConn, getRyaInstanceName());
        super.getMiniFluo().waitForObservers();
        // Load the streaming data into Rya.
        for (final Statement triple : streamedTriples) {
            ryaConn.add(triple);
        }
        // Ensure Alice is a match.
        super.getMiniFluo().waitForObservers();
        final Set<BindingSet> expected = new HashSet<>();
        MapBindingSet bs = new MapBindingSet();
        bs.addBinding("x", vf.createURI("http://Alice"));
        expected.add(bs);
        bs = new MapBindingSet();
        bs.addBinding("x", vf.createURI("http://Frank"));
        expected.add(bs);
        bs = new MapBindingSet();
        bs.addBinding("x", vf.createURI("http://Joe"));
        expected.add(bs);
        final Set<BindingSet> results = new HashSet<>();
        try (CloseableIterator<BindingSet> resultIt = pcjStorage.listResults(pcjId)) {
            while (resultIt.hasNext()) {
                results.add(resultIt.next());
            }
        }
        assertEquals(expected, results);
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) MapBindingSet(org.openrdf.query.impl.MapBindingSet) BindingSet(org.openrdf.query.BindingSet) FluoClient(org.apache.fluo.api.client.FluoClient) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) Statement(org.openrdf.model.Statement) ValueFactoryImpl(org.openrdf.model.impl.ValueFactoryImpl) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) ValueFactory(org.openrdf.model.ValueFactory) SailRepositoryConnection(org.openrdf.repository.sail.SailRepositoryConnection) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) MapBindingSet(org.openrdf.query.impl.MapBindingSet) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

CreateFluoPcj (org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj)24 FluoClient (org.apache.fluo.api.client.FluoClient)22 PrecomputedJoinStorage (org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage)20 AccumuloPcjStorage (org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage)20 Test (org.junit.Test)19 RyaStatement (org.apache.rya.api.domain.RyaStatement)14 BindingSet (org.openrdf.query.BindingSet)13 Connector (org.apache.accumulo.core.client.Connector)12 RyaURI (org.apache.rya.api.domain.RyaURI)11 InsertTriples (org.apache.rya.indexing.pcj.fluo.api.InsertTriples)11 MapBindingSet (org.openrdf.query.impl.MapBindingSet)11 HashSet (java.util.HashSet)10 FluoClientImpl (org.apache.fluo.core.client.FluoClientImpl)9 ValueFactory (org.openrdf.model.ValueFactory)9 ValueFactoryImpl (org.openrdf.model.impl.ValueFactoryImpl)9 Statement (org.openrdf.model.Statement)6 SailRepositoryConnection (org.openrdf.repository.sail.SailRepositoryConnection)6 Bytes (org.apache.fluo.api.data.Bytes)2 Span (org.apache.fluo.api.data.Span)2 AccumuloRyaDAO (org.apache.rya.accumulo.AccumuloRyaDAO)2