Search in sources :

Example 21 with PrecomputedJoinStorage

use of org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage in project incubator-rya by apache.

the class BatchIT method leftJoinBatchIntegrationTest.

@Test
public void leftJoinBatchIntegrationTest() throws Exception {
    final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + "OPTIONAL{ ?subject <urn:predicate_2> ?object2} } ";
    try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
        RyaURI subj = new RyaURI("urn:subject_1");
        RyaStatement statement1 = new RyaStatement(subj, new RyaURI("urn:predicate_1"), null);
        RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
        subj = new RyaURI("urn:subject_2");
        RyaStatement statement3 = new RyaStatement(subj, new RyaURI("urn:predicate_1"), null);
        Set<RyaStatement> statements1 = getRyaStatements(statement1, 10);
        Set<RyaStatement> statements2 = getRyaStatements(statement2, 10);
        Set<RyaStatement> statements3 = getRyaStatements(statement3, 10);
        // Create the PCJ table.
        final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
        final String pcjId = pcjStorage.createPcj(sparql);
        // Tell the Fluo app to maintain the PCJ and sets batch scan size for StatementPatterns to 5 and
        // batch size of joins to 5.
        String queryId = new CreateFluoPcj(5, 5).withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
        List<String> ids = getNodeIdStrings(fluoClient, queryId);
        // Stream the data into Fluo.
        InsertTriples inserter = new InsertTriples();
        inserter.insert(fluoClient, statements1, Optional.absent());
        inserter.insert(fluoClient, statements2, Optional.absent());
        inserter.insert(fluoClient, statements3, Optional.absent());
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(110, 110, 110, 20, 10));
    }
}
Also used : RyaURI(org.apache.rya.api.domain.RyaURI) FluoClient(org.apache.fluo.api.client.FluoClient) FluoClientImpl(org.apache.fluo.core.client.FluoClientImpl) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) InsertTriples(org.apache.rya.indexing.pcj.fluo.api.InsertTriples) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) RyaStatement(org.apache.rya.api.domain.RyaStatement) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) Test(org.junit.Test)

Example 22 with PrecomputedJoinStorage

use of org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage in project incubator-rya by apache.

the class BatchIT method joinBatchIntegrationTest.

@Test
public void joinBatchIntegrationTest() throws Exception {
    final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + " <urn:predicate_2> ?object2 } ";
    try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
        RyaURI subj = new RyaURI("urn:subject_1");
        RyaStatement statement1 = new RyaStatement(subj, new RyaURI("urn:predicate_1"), null);
        RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
        Set<RyaStatement> statements1 = getRyaStatements(statement1, 15);
        Set<RyaStatement> statements2 = getRyaStatements(statement2, 15);
        // Create the PCJ table.
        final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
        final String pcjId = pcjStorage.createPcj(sparql);
        // Tell the Fluo app to maintain the PCJ and sets batch scan size for StatementPatterns to 5 and
        // batch size of joins to 5.
        String queryId = new CreateFluoPcj(5, 5).withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
        List<String> ids = getNodeIdStrings(fluoClient, queryId);
        // Stream the data into Fluo.
        InsertTriples inserter = new InsertTriples();
        inserter.insert(fluoClient, statements1, Optional.absent());
        inserter.insert(fluoClient, statements2, Optional.absent());
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(225, 225, 225, 15, 15));
    }
}
Also used : RyaURI(org.apache.rya.api.domain.RyaURI) FluoClient(org.apache.fluo.api.client.FluoClient) FluoClientImpl(org.apache.fluo.core.client.FluoClientImpl) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) InsertTriples(org.apache.rya.indexing.pcj.fluo.api.InsertTriples) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) RyaStatement(org.apache.rya.api.domain.RyaStatement) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) Test(org.junit.Test)

Example 23 with PrecomputedJoinStorage

use of org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage in project incubator-rya by apache.

the class BatchIT method simpleJoinAdd.

@Test
public void simpleJoinAdd() throws Exception {
    final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + " <urn:predicate_2> ?object2 } ";
    try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
        RyaURI subj = new RyaURI("urn:subject_1");
        RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
        Set<RyaStatement> statements2 = getRyaStatements(statement2, 5);
        // Create the PCJ table.
        final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
        final String pcjId = pcjStorage.createPcj(sparql);
        // Tell the Fluo app to maintain the PCJ.
        String queryId = new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
        List<String> ids = getNodeIdStrings(fluoClient, queryId);
        String joinId = ids.get(2);
        String rightSp = ids.get(4);
        QueryBindingSet bs = new QueryBindingSet();
        bs.addBinding("subject", vf.createURI("urn:subject_1"));
        bs.addBinding("object1", vf.createURI("urn:object_0"));
        VisibilityBindingSet vBs = new VisibilityBindingSet(bs);
        URI uri = vf.createURI("urn:subject_1");
        Bytes prefixBytes = BindingHashShardingFunction.getShardedScanPrefix(rightSp, uri);
        Span span = Span.prefix(prefixBytes);
        // Stream the data into Fluo.
        InsertTriples inserter = new InsertTriples();
        inserter.insert(fluoClient, statements2, Optional.absent());
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(0, 0, 0, 0, 5));
        JoinBatchInformation batch = JoinBatchInformation.builder().setBatchSize(1).setColumn(FluoQueryColumns.STATEMENT_PATTERN_BINDING_SET).setSpan(span).setTask(Task.Add).setJoinType(JoinType.NATURAL_JOIN).setSide(Side.LEFT).setBs(vBs).build();
        // Verify the end results of the query match the expected results.
        createSpanBatch(fluoClient, joinId, batch);
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(5, 5, 5, 0, 5));
    }
}
Also used : FluoClient(org.apache.fluo.api.client.FluoClient) FluoClientImpl(org.apache.fluo.core.client.FluoClientImpl) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) InsertTriples(org.apache.rya.indexing.pcj.fluo.api.InsertTriples) RyaStatement(org.apache.rya.api.domain.RyaStatement) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) JoinBatchInformation(org.apache.rya.indexing.pcj.fluo.app.batch.JoinBatchInformation) URI(org.openrdf.model.URI) RyaURI(org.apache.rya.api.domain.RyaURI) Span(org.apache.fluo.api.data.Span) QueryBindingSet(org.openrdf.query.algebra.evaluation.QueryBindingSet) RyaURI(org.apache.rya.api.domain.RyaURI) Bytes(org.apache.fluo.api.data.Bytes) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) Test(org.junit.Test)

Example 24 with PrecomputedJoinStorage

use of org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage in project incubator-rya by apache.

the class RyaInputIncrementalUpdateIT method historicAndStreamMultiVariables.

@Test
public void historicAndStreamMultiVariables() throws Exception {
    // A query that finds people who talk to other people and work at Chipotle.
    final String sparql = "SELECT ?x ?y " + "WHERE { " + "?x <http://talksTo> ?y. " + "?x <http://worksAt> <http://Chipotle>." + "}";
    // Triples that are loaded into Rya before the PCJ is created.
    final ValueFactory vf = new ValueFactoryImpl();
    final Set<Statement> historicTriples = Sets.newHashSet(vf.createStatement(vf.createURI("http://Alice"), vf.createURI("http://talksTo"), vf.createURI("http://Eve")), vf.createStatement(vf.createURI("http://Alice"), vf.createURI("http://worksAt"), vf.createURI("http://Chipotle")), vf.createStatement(vf.createURI("http://Joe"), vf.createURI("http://worksAt"), vf.createURI("http://Chipotle")));
    // Triples that will be streamed into Fluo after the PCJ has been
    final Set<Statement> streamedTriples = Sets.newHashSet(vf.createStatement(vf.createURI("http://Frank"), vf.createURI("http://talksTo"), vf.createURI("http://Betty")), vf.createStatement(vf.createURI("http://Joe"), vf.createURI("http://talksTo"), vf.createURI("http://Alice")), vf.createStatement(vf.createURI("http://Frank"), vf.createURI("http://worksAt"), vf.createURI("http://Chipotle")));
    // Load the historic data into Rya.
    final SailRepositoryConnection ryaConn = super.getRyaSailRepository().getConnection();
    for (final Statement triple : historicTriples) {
        ryaConn.add(triple);
    }
    // Create the PCJ table.
    final Connector accumuloConn = super.getAccumuloConnector();
    final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(accumuloConn, getRyaInstanceName());
    final String pcjId = pcjStorage.createPcj(sparql);
    try (FluoClient fluoClient = FluoFactory.newClient(super.getFluoConfiguration())) {
        // Tell the Fluo app to maintain the PCJ.
        new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, accumuloConn, getRyaInstanceName());
        super.getMiniFluo().waitForObservers();
        // Load the streaming data into Rya.
        for (final Statement triple : streamedTriples) {
            ryaConn.add(triple);
        }
        // Ensure Alice is a match.
        super.getMiniFluo().waitForObservers();
        final Set<BindingSet> expected = new HashSet<>();
        MapBindingSet bs = new MapBindingSet();
        bs.addBinding("x", vf.createURI("http://Alice"));
        bs.addBinding("y", vf.createURI("http://Eve"));
        expected.add(bs);
        bs = new MapBindingSet();
        bs.addBinding("x", vf.createURI("http://Frank"));
        bs.addBinding("y", vf.createURI("http://Betty"));
        expected.add(bs);
        bs = new MapBindingSet();
        bs.addBinding("x", vf.createURI("http://Joe"));
        bs.addBinding("y", vf.createURI("http://Alice"));
        expected.add(bs);
        final Set<BindingSet> results = new HashSet<>();
        try (CloseableIterator<BindingSet> resultIt = pcjStorage.listResults(pcjId)) {
            while (resultIt.hasNext()) {
                results.add(resultIt.next());
            }
        }
        assertEquals(expected, results);
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) MapBindingSet(org.openrdf.query.impl.MapBindingSet) BindingSet(org.openrdf.query.BindingSet) FluoClient(org.apache.fluo.api.client.FluoClient) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) Statement(org.openrdf.model.Statement) ValueFactoryImpl(org.openrdf.model.impl.ValueFactoryImpl) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) ValueFactory(org.openrdf.model.ValueFactory) SailRepositoryConnection(org.openrdf.repository.sail.SailRepositoryConnection) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) MapBindingSet(org.openrdf.query.impl.MapBindingSet) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 25 with PrecomputedJoinStorage

use of org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage in project incubator-rya by apache.

the class ListQueriesCommand method execute.

@Override
public void execute(final Connector accumulo, final String ryaTablePrefix, final RyaSailRepository rya, final FluoClient fluo, final String[] args) throws ArgumentsException, ExecutionException {
    checkNotNull(accumulo);
    checkNotNull(fluo);
    checkNotNull(args);
    log.trace("Executing the List Queries Command...");
    // Parse the command line arguments.
    final Parameters params = new Parameters();
    try {
        new JCommander(params, args);
    } catch (final ParameterException e) {
        throw new ArgumentsException("Could not list the queries because of invalid command line parameters.", e);
    }
    // Fetch the PCJ metadata that will be included in the report.
    final GetPcjMetadata getPcjMetadata = new GetPcjMetadata();
    final Map<String, PcjMetadata> metadata = new HashMap<String, PcjMetadata>();
    try {
        final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(accumulo, ryaTablePrefix);
        if (params.queryId != null) {
            log.trace("Fetch the PCJ Metadata from Accumulo for Query ID '" + params.queryId + "'.");
            metadata.put(params.queryId, getPcjMetadata.getMetadata(pcjStorage, fluo, params.queryId));
        } else {
            log.trace("Fetch the PCJ Metadata from Accumulo for all queries that are being updated by Fluo.");
            metadata.putAll(getPcjMetadata.getMetadata(pcjStorage, fluo));
        }
    } catch (NotInFluoException | NotInAccumuloException e) {
        throw new ExecutionException("Could not fetch some of the metadata required to build the report.", e);
    }
    // Write the metadata to the console.
    log.trace("Rendering the queries report...");
    if (metadata.isEmpty()) {
        System.out.println("No queries are being tracked by Fluo.");
    } else {
        final PcjMetadataRenderer renderer = new PcjMetadataRenderer();
        try {
            final String report = renderer.render(metadata);
            System.out.println("The number of Queries that are being tracked by Fluo: " + metadata.size());
            System.out.println(report);
        } catch (final Exception e) {
            throw new ExecutionException("Unable to render the query metadata report for output.", e);
        }
    }
    log.trace("Finished executing the List Queries Command.");
}
Also used : NotInFluoException(org.apache.rya.indexing.pcj.fluo.api.GetPcjMetadata.NotInFluoException) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) NotInAccumuloException(org.apache.rya.indexing.pcj.fluo.api.GetPcjMetadata.NotInAccumuloException) HashMap(java.util.HashMap) ParameterException(com.beust.jcommander.ParameterException) NotInFluoException(org.apache.rya.indexing.pcj.fluo.api.GetPcjMetadata.NotInFluoException) NotInAccumuloException(org.apache.rya.indexing.pcj.fluo.api.GetPcjMetadata.NotInAccumuloException) JCommander(com.beust.jcommander.JCommander) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) PcjMetadataRenderer(org.apache.rya.indexing.pcj.fluo.client.util.PcjMetadataRenderer) ParameterException(com.beust.jcommander.ParameterException) PcjMetadata(org.apache.rya.indexing.pcj.storage.PcjMetadata) GetPcjMetadata(org.apache.rya.indexing.pcj.fluo.api.GetPcjMetadata) GetPcjMetadata(org.apache.rya.indexing.pcj.fluo.api.GetPcjMetadata)

Aggregations

PrecomputedJoinStorage (org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage)55 AccumuloPcjStorage (org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage)44 Test (org.junit.Test)41 Connector (org.apache.accumulo.core.client.Connector)25 BindingSet (org.openrdf.query.BindingSet)23 MapBindingSet (org.openrdf.query.impl.MapBindingSet)22 FluoClient (org.apache.fluo.api.client.FluoClient)21 CreateFluoPcj (org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj)20 HashSet (java.util.HashSet)18 RyaStatement (org.apache.rya.api.domain.RyaStatement)15 PcjMetadata (org.apache.rya.indexing.pcj.storage.PcjMetadata)15 ValueFactory (org.openrdf.model.ValueFactory)14 RyaURI (org.apache.rya.api.domain.RyaURI)12 InsertTriples (org.apache.rya.indexing.pcj.fluo.api.InsertTriples)11 VisibilityBindingSet (org.apache.rya.api.model.VisibilityBindingSet)10 PCJDetails (org.apache.rya.api.instance.RyaDetails.PCJIndexDetails.PCJDetails)9 ShiftVarOrderFactory (org.apache.rya.indexing.pcj.storage.accumulo.ShiftVarOrderFactory)9 VariableOrder (org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder)9 ValueFactoryImpl (org.openrdf.model.impl.ValueFactoryImpl)9 MalformedQueryException (org.openrdf.query.MalformedQueryException)8