use of org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage in project incubator-rya by apache.
the class BatchIT method leftJoinBatchIntegrationTest.
@Test
public void leftJoinBatchIntegrationTest() throws Exception {
final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + "OPTIONAL{ ?subject <urn:predicate_2> ?object2} } ";
try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
RyaURI subj = new RyaURI("urn:subject_1");
RyaStatement statement1 = new RyaStatement(subj, new RyaURI("urn:predicate_1"), null);
RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
subj = new RyaURI("urn:subject_2");
RyaStatement statement3 = new RyaStatement(subj, new RyaURI("urn:predicate_1"), null);
Set<RyaStatement> statements1 = getRyaStatements(statement1, 10);
Set<RyaStatement> statements2 = getRyaStatements(statement2, 10);
Set<RyaStatement> statements3 = getRyaStatements(statement3, 10);
// Create the PCJ table.
final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
final String pcjId = pcjStorage.createPcj(sparql);
// Tell the Fluo app to maintain the PCJ and sets batch scan size for StatementPatterns to 5 and
// batch size of joins to 5.
String queryId = new CreateFluoPcj(5, 5).withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
List<String> ids = getNodeIdStrings(fluoClient, queryId);
// Stream the data into Fluo.
InsertTriples inserter = new InsertTriples();
inserter.insert(fluoClient, statements1, Optional.absent());
inserter.insert(fluoClient, statements2, Optional.absent());
inserter.insert(fluoClient, statements3, Optional.absent());
getMiniFluo().waitForObservers();
verifyCounts(fluoClient, ids, Arrays.asList(110, 110, 110, 20, 10));
}
}
use of org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage in project incubator-rya by apache.
the class BatchIT method joinBatchIntegrationTest.
@Test
public void joinBatchIntegrationTest() throws Exception {
final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + " <urn:predicate_2> ?object2 } ";
try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
RyaURI subj = new RyaURI("urn:subject_1");
RyaStatement statement1 = new RyaStatement(subj, new RyaURI("urn:predicate_1"), null);
RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
Set<RyaStatement> statements1 = getRyaStatements(statement1, 15);
Set<RyaStatement> statements2 = getRyaStatements(statement2, 15);
// Create the PCJ table.
final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
final String pcjId = pcjStorage.createPcj(sparql);
// Tell the Fluo app to maintain the PCJ and sets batch scan size for StatementPatterns to 5 and
// batch size of joins to 5.
String queryId = new CreateFluoPcj(5, 5).withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
List<String> ids = getNodeIdStrings(fluoClient, queryId);
// Stream the data into Fluo.
InsertTriples inserter = new InsertTriples();
inserter.insert(fluoClient, statements1, Optional.absent());
inserter.insert(fluoClient, statements2, Optional.absent());
getMiniFluo().waitForObservers();
verifyCounts(fluoClient, ids, Arrays.asList(225, 225, 225, 15, 15));
}
}
use of org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage in project incubator-rya by apache.
the class BatchIT method simpleJoinAdd.
@Test
public void simpleJoinAdd() throws Exception {
final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + " <urn:predicate_2> ?object2 } ";
try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
RyaURI subj = new RyaURI("urn:subject_1");
RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
Set<RyaStatement> statements2 = getRyaStatements(statement2, 5);
// Create the PCJ table.
final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
final String pcjId = pcjStorage.createPcj(sparql);
// Tell the Fluo app to maintain the PCJ.
String queryId = new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
List<String> ids = getNodeIdStrings(fluoClient, queryId);
String joinId = ids.get(2);
String rightSp = ids.get(4);
QueryBindingSet bs = new QueryBindingSet();
bs.addBinding("subject", vf.createURI("urn:subject_1"));
bs.addBinding("object1", vf.createURI("urn:object_0"));
VisibilityBindingSet vBs = new VisibilityBindingSet(bs);
URI uri = vf.createURI("urn:subject_1");
Bytes prefixBytes = BindingHashShardingFunction.getShardedScanPrefix(rightSp, uri);
Span span = Span.prefix(prefixBytes);
// Stream the data into Fluo.
InsertTriples inserter = new InsertTriples();
inserter.insert(fluoClient, statements2, Optional.absent());
getMiniFluo().waitForObservers();
verifyCounts(fluoClient, ids, Arrays.asList(0, 0, 0, 0, 5));
JoinBatchInformation batch = JoinBatchInformation.builder().setBatchSize(1).setColumn(FluoQueryColumns.STATEMENT_PATTERN_BINDING_SET).setSpan(span).setTask(Task.Add).setJoinType(JoinType.NATURAL_JOIN).setSide(Side.LEFT).setBs(vBs).build();
// Verify the end results of the query match the expected results.
createSpanBatch(fluoClient, joinId, batch);
getMiniFluo().waitForObservers();
verifyCounts(fluoClient, ids, Arrays.asList(5, 5, 5, 0, 5));
}
}
use of org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage in project incubator-rya by apache.
the class RyaInputIncrementalUpdateIT method historicAndStreamMultiVariables.
@Test
public void historicAndStreamMultiVariables() throws Exception {
// A query that finds people who talk to other people and work at Chipotle.
final String sparql = "SELECT ?x ?y " + "WHERE { " + "?x <http://talksTo> ?y. " + "?x <http://worksAt> <http://Chipotle>." + "}";
// Triples that are loaded into Rya before the PCJ is created.
final ValueFactory vf = new ValueFactoryImpl();
final Set<Statement> historicTriples = Sets.newHashSet(vf.createStatement(vf.createURI("http://Alice"), vf.createURI("http://talksTo"), vf.createURI("http://Eve")), vf.createStatement(vf.createURI("http://Alice"), vf.createURI("http://worksAt"), vf.createURI("http://Chipotle")), vf.createStatement(vf.createURI("http://Joe"), vf.createURI("http://worksAt"), vf.createURI("http://Chipotle")));
// Triples that will be streamed into Fluo after the PCJ has been
final Set<Statement> streamedTriples = Sets.newHashSet(vf.createStatement(vf.createURI("http://Frank"), vf.createURI("http://talksTo"), vf.createURI("http://Betty")), vf.createStatement(vf.createURI("http://Joe"), vf.createURI("http://talksTo"), vf.createURI("http://Alice")), vf.createStatement(vf.createURI("http://Frank"), vf.createURI("http://worksAt"), vf.createURI("http://Chipotle")));
// Load the historic data into Rya.
final SailRepositoryConnection ryaConn = super.getRyaSailRepository().getConnection();
for (final Statement triple : historicTriples) {
ryaConn.add(triple);
}
// Create the PCJ table.
final Connector accumuloConn = super.getAccumuloConnector();
final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(accumuloConn, getRyaInstanceName());
final String pcjId = pcjStorage.createPcj(sparql);
try (FluoClient fluoClient = FluoFactory.newClient(super.getFluoConfiguration())) {
// Tell the Fluo app to maintain the PCJ.
new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, accumuloConn, getRyaInstanceName());
super.getMiniFluo().waitForObservers();
// Load the streaming data into Rya.
for (final Statement triple : streamedTriples) {
ryaConn.add(triple);
}
// Ensure Alice is a match.
super.getMiniFluo().waitForObservers();
final Set<BindingSet> expected = new HashSet<>();
MapBindingSet bs = new MapBindingSet();
bs.addBinding("x", vf.createURI("http://Alice"));
bs.addBinding("y", vf.createURI("http://Eve"));
expected.add(bs);
bs = new MapBindingSet();
bs.addBinding("x", vf.createURI("http://Frank"));
bs.addBinding("y", vf.createURI("http://Betty"));
expected.add(bs);
bs = new MapBindingSet();
bs.addBinding("x", vf.createURI("http://Joe"));
bs.addBinding("y", vf.createURI("http://Alice"));
expected.add(bs);
final Set<BindingSet> results = new HashSet<>();
try (CloseableIterator<BindingSet> resultIt = pcjStorage.listResults(pcjId)) {
while (resultIt.hasNext()) {
results.add(resultIt.next());
}
}
assertEquals(expected, results);
}
}
use of org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage in project incubator-rya by apache.
the class ListQueriesCommand method execute.
@Override
public void execute(final Connector accumulo, final String ryaTablePrefix, final RyaSailRepository rya, final FluoClient fluo, final String[] args) throws ArgumentsException, ExecutionException {
checkNotNull(accumulo);
checkNotNull(fluo);
checkNotNull(args);
log.trace("Executing the List Queries Command...");
// Parse the command line arguments.
final Parameters params = new Parameters();
try {
new JCommander(params, args);
} catch (final ParameterException e) {
throw new ArgumentsException("Could not list the queries because of invalid command line parameters.", e);
}
// Fetch the PCJ metadata that will be included in the report.
final GetPcjMetadata getPcjMetadata = new GetPcjMetadata();
final Map<String, PcjMetadata> metadata = new HashMap<String, PcjMetadata>();
try {
final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(accumulo, ryaTablePrefix);
if (params.queryId != null) {
log.trace("Fetch the PCJ Metadata from Accumulo for Query ID '" + params.queryId + "'.");
metadata.put(params.queryId, getPcjMetadata.getMetadata(pcjStorage, fluo, params.queryId));
} else {
log.trace("Fetch the PCJ Metadata from Accumulo for all queries that are being updated by Fluo.");
metadata.putAll(getPcjMetadata.getMetadata(pcjStorage, fluo));
}
} catch (NotInFluoException | NotInAccumuloException e) {
throw new ExecutionException("Could not fetch some of the metadata required to build the report.", e);
}
// Write the metadata to the console.
log.trace("Rendering the queries report...");
if (metadata.isEmpty()) {
System.out.println("No queries are being tracked by Fluo.");
} else {
final PcjMetadataRenderer renderer = new PcjMetadataRenderer();
try {
final String report = renderer.render(metadata);
System.out.println("The number of Queries that are being tracked by Fluo: " + metadata.size());
System.out.println(report);
} catch (final Exception e) {
throw new ExecutionException("Unable to render the query metadata report for output.", e);
}
}
log.trace("Finished executing the List Queries Command.");
}
Aggregations