Search in sources :

Example 6 with CreateFluoPcj

use of org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj in project incubator-rya by apache.

the class BatchIT method leftJoinBatchIntegrationTest.

@Test
public void leftJoinBatchIntegrationTest() throws Exception {
    final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + "OPTIONAL{ ?subject <urn:predicate_2> ?object2} } ";
    try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
        RyaURI subj = new RyaURI("urn:subject_1");
        RyaStatement statement1 = new RyaStatement(subj, new RyaURI("urn:predicate_1"), null);
        RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
        subj = new RyaURI("urn:subject_2");
        RyaStatement statement3 = new RyaStatement(subj, new RyaURI("urn:predicate_1"), null);
        Set<RyaStatement> statements1 = getRyaStatements(statement1, 10);
        Set<RyaStatement> statements2 = getRyaStatements(statement2, 10);
        Set<RyaStatement> statements3 = getRyaStatements(statement3, 10);
        // Create the PCJ table.
        final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
        final String pcjId = pcjStorage.createPcj(sparql);
        // Tell the Fluo app to maintain the PCJ and sets batch scan size for StatementPatterns to 5 and
        // batch size of joins to 5.
        String queryId = new CreateFluoPcj(5, 5).withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
        List<String> ids = getNodeIdStrings(fluoClient, queryId);
        // Stream the data into Fluo.
        InsertTriples inserter = new InsertTriples();
        inserter.insert(fluoClient, statements1, Optional.absent());
        inserter.insert(fluoClient, statements2, Optional.absent());
        inserter.insert(fluoClient, statements3, Optional.absent());
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(110, 110, 110, 20, 10));
    }
}
Also used : RyaURI(org.apache.rya.api.domain.RyaURI) FluoClient(org.apache.fluo.api.client.FluoClient) FluoClientImpl(org.apache.fluo.core.client.FluoClientImpl) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) InsertTriples(org.apache.rya.indexing.pcj.fluo.api.InsertTriples) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) RyaStatement(org.apache.rya.api.domain.RyaStatement) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) Test(org.junit.Test)

Example 7 with CreateFluoPcj

use of org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj in project incubator-rya by apache.

the class BatchIT method joinBatchIntegrationTest.

@Test
public void joinBatchIntegrationTest() throws Exception {
    final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + " <urn:predicate_2> ?object2 } ";
    try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
        RyaURI subj = new RyaURI("urn:subject_1");
        RyaStatement statement1 = new RyaStatement(subj, new RyaURI("urn:predicate_1"), null);
        RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
        Set<RyaStatement> statements1 = getRyaStatements(statement1, 15);
        Set<RyaStatement> statements2 = getRyaStatements(statement2, 15);
        // Create the PCJ table.
        final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
        final String pcjId = pcjStorage.createPcj(sparql);
        // Tell the Fluo app to maintain the PCJ and sets batch scan size for StatementPatterns to 5 and
        // batch size of joins to 5.
        String queryId = new CreateFluoPcj(5, 5).withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
        List<String> ids = getNodeIdStrings(fluoClient, queryId);
        // Stream the data into Fluo.
        InsertTriples inserter = new InsertTriples();
        inserter.insert(fluoClient, statements1, Optional.absent());
        inserter.insert(fluoClient, statements2, Optional.absent());
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(225, 225, 225, 15, 15));
    }
}
Also used : RyaURI(org.apache.rya.api.domain.RyaURI) FluoClient(org.apache.fluo.api.client.FluoClient) FluoClientImpl(org.apache.fluo.core.client.FluoClientImpl) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) InsertTriples(org.apache.rya.indexing.pcj.fluo.api.InsertTriples) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) RyaStatement(org.apache.rya.api.domain.RyaStatement) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) Test(org.junit.Test)

Example 8 with CreateFluoPcj

use of org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj in project incubator-rya by apache.

the class BatchIT method simpleJoinAdd.

@Test
public void simpleJoinAdd() throws Exception {
    final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + " <urn:predicate_2> ?object2 } ";
    try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
        RyaURI subj = new RyaURI("urn:subject_1");
        RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
        Set<RyaStatement> statements2 = getRyaStatements(statement2, 5);
        // Create the PCJ table.
        final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
        final String pcjId = pcjStorage.createPcj(sparql);
        // Tell the Fluo app to maintain the PCJ.
        String queryId = new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
        List<String> ids = getNodeIdStrings(fluoClient, queryId);
        String joinId = ids.get(2);
        String rightSp = ids.get(4);
        QueryBindingSet bs = new QueryBindingSet();
        bs.addBinding("subject", vf.createURI("urn:subject_1"));
        bs.addBinding("object1", vf.createURI("urn:object_0"));
        VisibilityBindingSet vBs = new VisibilityBindingSet(bs);
        URI uri = vf.createURI("urn:subject_1");
        Bytes prefixBytes = BindingHashShardingFunction.getShardedScanPrefix(rightSp, uri);
        Span span = Span.prefix(prefixBytes);
        // Stream the data into Fluo.
        InsertTriples inserter = new InsertTriples();
        inserter.insert(fluoClient, statements2, Optional.absent());
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(0, 0, 0, 0, 5));
        JoinBatchInformation batch = JoinBatchInformation.builder().setBatchSize(1).setColumn(FluoQueryColumns.STATEMENT_PATTERN_BINDING_SET).setSpan(span).setTask(Task.Add).setJoinType(JoinType.NATURAL_JOIN).setSide(Side.LEFT).setBs(vBs).build();
        // Verify the end results of the query match the expected results.
        createSpanBatch(fluoClient, joinId, batch);
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(5, 5, 5, 0, 5));
    }
}
Also used : FluoClient(org.apache.fluo.api.client.FluoClient) FluoClientImpl(org.apache.fluo.core.client.FluoClientImpl) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) InsertTriples(org.apache.rya.indexing.pcj.fluo.api.InsertTriples) RyaStatement(org.apache.rya.api.domain.RyaStatement) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) JoinBatchInformation(org.apache.rya.indexing.pcj.fluo.app.batch.JoinBatchInformation) URI(org.openrdf.model.URI) RyaURI(org.apache.rya.api.domain.RyaURI) Span(org.apache.fluo.api.data.Span) QueryBindingSet(org.openrdf.query.algebra.evaluation.QueryBindingSet) RyaURI(org.apache.rya.api.domain.RyaURI) Bytes(org.apache.fluo.api.data.Bytes) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) Test(org.junit.Test)

Example 9 with CreateFluoPcj

use of org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj in project incubator-rya by apache.

the class RyaInputIncrementalUpdateIT method historicAndStreamMultiVariables.

@Test
public void historicAndStreamMultiVariables() throws Exception {
    // A query that finds people who talk to other people and work at Chipotle.
    final String sparql = "SELECT ?x ?y " + "WHERE { " + "?x <http://talksTo> ?y. " + "?x <http://worksAt> <http://Chipotle>." + "}";
    // Triples that are loaded into Rya before the PCJ is created.
    final ValueFactory vf = new ValueFactoryImpl();
    final Set<Statement> historicTriples = Sets.newHashSet(vf.createStatement(vf.createURI("http://Alice"), vf.createURI("http://talksTo"), vf.createURI("http://Eve")), vf.createStatement(vf.createURI("http://Alice"), vf.createURI("http://worksAt"), vf.createURI("http://Chipotle")), vf.createStatement(vf.createURI("http://Joe"), vf.createURI("http://worksAt"), vf.createURI("http://Chipotle")));
    // Triples that will be streamed into Fluo after the PCJ has been
    final Set<Statement> streamedTriples = Sets.newHashSet(vf.createStatement(vf.createURI("http://Frank"), vf.createURI("http://talksTo"), vf.createURI("http://Betty")), vf.createStatement(vf.createURI("http://Joe"), vf.createURI("http://talksTo"), vf.createURI("http://Alice")), vf.createStatement(vf.createURI("http://Frank"), vf.createURI("http://worksAt"), vf.createURI("http://Chipotle")));
    // Load the historic data into Rya.
    final SailRepositoryConnection ryaConn = super.getRyaSailRepository().getConnection();
    for (final Statement triple : historicTriples) {
        ryaConn.add(triple);
    }
    // Create the PCJ table.
    final Connector accumuloConn = super.getAccumuloConnector();
    final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(accumuloConn, getRyaInstanceName());
    final String pcjId = pcjStorage.createPcj(sparql);
    try (FluoClient fluoClient = FluoFactory.newClient(super.getFluoConfiguration())) {
        // Tell the Fluo app to maintain the PCJ.
        new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, accumuloConn, getRyaInstanceName());
        super.getMiniFluo().waitForObservers();
        // Load the streaming data into Rya.
        for (final Statement triple : streamedTriples) {
            ryaConn.add(triple);
        }
        // Ensure Alice is a match.
        super.getMiniFluo().waitForObservers();
        final Set<BindingSet> expected = new HashSet<>();
        MapBindingSet bs = new MapBindingSet();
        bs.addBinding("x", vf.createURI("http://Alice"));
        bs.addBinding("y", vf.createURI("http://Eve"));
        expected.add(bs);
        bs = new MapBindingSet();
        bs.addBinding("x", vf.createURI("http://Frank"));
        bs.addBinding("y", vf.createURI("http://Betty"));
        expected.add(bs);
        bs = new MapBindingSet();
        bs.addBinding("x", vf.createURI("http://Joe"));
        bs.addBinding("y", vf.createURI("http://Alice"));
        expected.add(bs);
        final Set<BindingSet> results = new HashSet<>();
        try (CloseableIterator<BindingSet> resultIt = pcjStorage.listResults(pcjId)) {
            while (resultIt.hasNext()) {
                results.add(resultIt.next());
            }
        }
        assertEquals(expected, results);
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) MapBindingSet(org.openrdf.query.impl.MapBindingSet) BindingSet(org.openrdf.query.BindingSet) FluoClient(org.apache.fluo.api.client.FluoClient) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) Statement(org.openrdf.model.Statement) ValueFactoryImpl(org.openrdf.model.impl.ValueFactoryImpl) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) ValueFactory(org.openrdf.model.ValueFactory) SailRepositoryConnection(org.openrdf.repository.sail.SailRepositoryConnection) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) MapBindingSet(org.openrdf.query.impl.MapBindingSet) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 10 with CreateFluoPcj

use of org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj in project incubator-rya by apache.

the class FluoAndHistoricPcjsDemo method execute.

@Override
public void execute(final MiniAccumuloCluster accumulo, final Connector accumuloConn, final String ryaTablePrefix, final RyaSailRepository ryaRepo, final RepositoryConnection ryaConn, final MiniFluo fluo, final FluoClient fluoClient) throws DemoExecutionException {
    log.setLevel(Level.INFO);
    // 1. Introduce some RDF Statements that we are going to start with and
    // pause so the presenter can introduce this information to the audience.
    final Set<RyaStatement> relevantHistoricStatements = Sets.newHashSet(new RyaStatement(eve, talksTo, charlie), new RyaStatement(david, talksTo, alice), new RyaStatement(alice, worksAt, coffeeShop), new RyaStatement(bob, worksAt, coffeeShop));
    log.info("We add some Statements that are relevant to the query we will compute:");
    prettyLogStatements(relevantHistoricStatements);
    waitForEnter();
    log.info("We also some more Satements that aren't realted to the query we will compute");
    final Set<RyaStatement> otherHistoricStatements = Sets.newHashSet(new RyaStatement(henry, worksAt, burgerShop), new RyaStatement(irene, worksAt, burgerShop), new RyaStatement(justin, worksAt, burgerShop), new RyaStatement(kristi, worksAt, burgerShop), new RyaStatement(luke, worksAt, burgerShop), new RyaStatement(manny, worksAt, cupcakeShop), new RyaStatement(nate, worksAt, cupcakeShop), new RyaStatement(olivia, worksAt, cupcakeShop), new RyaStatement(paul, worksAt, cupcakeShop), new RyaStatement(ross, worksAt, cupcakeShop), new RyaStatement(henry, talksTo, irene), new RyaStatement(henry, talksTo, justin), new RyaStatement(kristi, talksTo, irene), new RyaStatement(luke, talksTo, irene), new RyaStatement(sally, talksTo, paul), new RyaStatement(sally, talksTo, ross), new RyaStatement(sally, talksTo, kristi), new RyaStatement(tim, talksTo, nate), new RyaStatement(tim, talksTo, paul), new RyaStatement(tim, talksTo, kristi));
    log.info("Theese statements will also be inserted into the core Rya tables:");
    prettyLogStatements(otherHistoricStatements);
    waitForEnter();
    // 2. Load the statements into the core Rya tables.
    log.info("Loading the historic statements into Rya...");
    loadDataIntoRya(ryaConn, relevantHistoricStatements);
    loadDataIntoRya(ryaConn, otherHistoricStatements);
    log.info("");
    // 3. Introduce the query that we're going to load into Fluo and pause so that the
    // presenter may show what they believe the expected output should be.
    final String sparql = "SELECT ?patron ?employee " + "WHERE { " + "?patron <http://talksTo> ?employee. " + "?employee <http://worksAt> <http://CoffeeShop>. " + "}";
    log.info("The following SPARQL query will be loaded into the Fluo application for incremental updates:");
    prettyLogSparql(sparql);
    waitForEnter();
    // 4. Write the query to Fluo and import the historic matches. Wait for the app to finish exporting results.
    log.info("Telling Fluo to maintain the query and import the historic Statement Pattern matches.");
    final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(accumuloConn, ryaTablePrefix);
    final String pcjId;
    try {
        // Create the PCJ Index in Rya.
        pcjId = pcjStorage.createPcj(sparql);
        // Tell the Fluo app to maintain it.
        new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, accumuloConn, ryaTablePrefix);
    } catch (MalformedQueryException | PcjException | RyaDAOException | UnsupportedQueryException e) {
        throw new DemoExecutionException("Error while using Fluo to compute and export historic matches, so the demo can not continue. Exiting.", e);
    }
    log.info("Waiting for the fluo application to finish exporting the initial results...");
    fluo.waitForObservers();
    log.info("Historic result exporting finished.");
    log.info("");
    // 5. Show that the Fluo app exported the results to the PCJ table in Accumulo.
    log.info("The following Binding Sets were exported to the PCJ with ID '" + pcjId + "' in Rya:");
    try (CloseableIterator<BindingSet> resultsIt = pcjStorage.listResults(pcjId)) {
        while (resultsIt.hasNext()) {
            log.info("    " + resultsIt.next());
        }
    } catch (final Exception e) {
        throw new DemoExecutionException("Could not fetch the PCJ's reuslts from Accumulo. Exiting.", e);
    }
    waitForEnter();
    // 6. Introduce some new Statements that we will stream into the Fluo app.
    final RyaStatement newLeft = new RyaStatement(george, talksTo, frank);
    final RyaStatement newRight = new RyaStatement(frank, worksAt, coffeeShop);
    final RyaStatement joinLeft = new RyaStatement(eve, talksTo, bob);
    final RyaStatement joinRight = new RyaStatement(charlie, worksAt, coffeeShop);
    final Set<RyaStatement> relevantstreamedStatements = Sets.newHashSet(newLeft, newRight, joinLeft, joinRight);
    log.info("We stream these relevant Statements into Fluo and the core Rya tables:");
    log.info(prettyFormat(newLeft) + "          - Part of a new result");
    log.info(prettyFormat(newRight) + "      - Other part of a new result");
    log.info(prettyFormat(joinLeft) + "               - Joins with a historic <http://talksTo> statement");
    log.info(prettyFormat(joinRight) + "    - Joins with a historic <http://worksA>t statement");
    waitForEnter();
    final Set<RyaStatement> otherStreamedStatements = Sets.newHashSet(new RyaStatement(alice, talksTo, tim), new RyaStatement(bob, talksTo, tim), new RyaStatement(charlie, talksTo, tim), new RyaStatement(frank, talksTo, tim), new RyaStatement(david, talksTo, tim), new RyaStatement(eve, talksTo, sally), new RyaStatement(george, talksTo, sally), new RyaStatement(henry, talksTo, sally), new RyaStatement(irene, talksTo, sally), new RyaStatement(justin, talksTo, sally), new RyaStatement(kristi, talksTo, manny), new RyaStatement(luke, talksTo, manny), new RyaStatement(manny, talksTo, paul), new RyaStatement(nate, talksTo, manny), new RyaStatement(olivia, talksTo, manny), new RyaStatement(paul, talksTo, kristi), new RyaStatement(ross, talksTo, kristi), new RyaStatement(sally, talksTo, kristi), new RyaStatement(olivia, talksTo, kristi), new RyaStatement(olivia, talksTo, kristi));
    log.info("We also stream these irrelevant Statements into Fluo and the core Rya tables:");
    prettyLogStatements(otherStreamedStatements);
    waitForEnter();
    // 7. Insert the new triples into the core Rya tables and the Fluo app.
    loadDataIntoRya(ryaConn, relevantstreamedStatements);
    loadDataIntoFluo(fluoClient, relevantstreamedStatements);
    log.info("Waiting for the fluo application to finish exporting the newly streamed results...");
    fluo.waitForObservers();
    log.info("Streamed result exporting finished.");
    log.info("");
    // 8. Show the new results have been exported to the PCJ table in Accumulo.
    log.info("The following Binding Sets were exported to the PCJ with ID '" + pcjId + "' in Rya:");
    try (CloseableIterator<BindingSet> resultsIt = pcjStorage.listResults(pcjId)) {
        while (resultsIt.hasNext()) {
            log.info("    " + resultsIt.next());
        }
    } catch (final Exception e) {
        throw new DemoExecutionException("Could not fetch the PCJ's reuslts from Accumulo. Exiting.", e);
    }
    log.info("");
}
Also used : BindingSet(org.openrdf.query.BindingSet) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) PcjException(org.apache.rya.indexing.pcj.storage.PcjException) UnsupportedQueryException(org.apache.rya.indexing.pcj.fluo.app.query.UnsupportedQueryException) RyaStatement(org.apache.rya.api.domain.RyaStatement) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) UnsupportedQueryException(org.apache.rya.indexing.pcj.fluo.app.query.UnsupportedQueryException) PcjException(org.apache.rya.indexing.pcj.storage.PcjException) RepositoryException(org.openrdf.repository.RepositoryException) MalformedQueryException(org.openrdf.query.MalformedQueryException) RyaDAOException(org.apache.rya.api.persist.RyaDAOException) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) MalformedQueryException(org.openrdf.query.MalformedQueryException) RyaDAOException(org.apache.rya.api.persist.RyaDAOException)

Aggregations

CreateFluoPcj (org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj)24 FluoClient (org.apache.fluo.api.client.FluoClient)22 PrecomputedJoinStorage (org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage)20 AccumuloPcjStorage (org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage)20 Test (org.junit.Test)19 RyaStatement (org.apache.rya.api.domain.RyaStatement)14 BindingSet (org.openrdf.query.BindingSet)13 Connector (org.apache.accumulo.core.client.Connector)12 RyaURI (org.apache.rya.api.domain.RyaURI)11 InsertTriples (org.apache.rya.indexing.pcj.fluo.api.InsertTriples)11 MapBindingSet (org.openrdf.query.impl.MapBindingSet)11 HashSet (java.util.HashSet)10 FluoClientImpl (org.apache.fluo.core.client.FluoClientImpl)9 ValueFactory (org.openrdf.model.ValueFactory)9 ValueFactoryImpl (org.openrdf.model.impl.ValueFactoryImpl)9 Statement (org.openrdf.model.Statement)6 SailRepositoryConnection (org.openrdf.repository.sail.SailRepositoryConnection)6 Bytes (org.apache.fluo.api.data.Bytes)2 Span (org.apache.fluo.api.data.Span)2 AccumuloRyaDAO (org.apache.rya.accumulo.AccumuloRyaDAO)2