use of org.apache.rya.indexing.pcj.fluo.app.batch.JoinBatchInformation in project incubator-rya by apache.
the class BatchIT method simpleJoinAdd.
@Test
public void simpleJoinAdd() throws Exception {
final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + " <urn:predicate_2> ?object2 } ";
try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
RyaURI subj = new RyaURI("urn:subject_1");
RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
Set<RyaStatement> statements2 = getRyaStatements(statement2, 5);
// Create the PCJ table.
final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
final String pcjId = pcjStorage.createPcj(sparql);
// Tell the Fluo app to maintain the PCJ.
String queryId = new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
List<String> ids = getNodeIdStrings(fluoClient, queryId);
String joinId = ids.get(2);
String rightSp = ids.get(4);
QueryBindingSet bs = new QueryBindingSet();
bs.addBinding("subject", vf.createURI("urn:subject_1"));
bs.addBinding("object1", vf.createURI("urn:object_0"));
VisibilityBindingSet vBs = new VisibilityBindingSet(bs);
URI uri = vf.createURI("urn:subject_1");
Bytes prefixBytes = BindingHashShardingFunction.getShardedScanPrefix(rightSp, uri);
Span span = Span.prefix(prefixBytes);
// Stream the data into Fluo.
InsertTriples inserter = new InsertTriples();
inserter.insert(fluoClient, statements2, Optional.absent());
getMiniFluo().waitForObservers();
verifyCounts(fluoClient, ids, Arrays.asList(0, 0, 0, 0, 5));
JoinBatchInformation batch = JoinBatchInformation.builder().setBatchSize(1).setColumn(FluoQueryColumns.STATEMENT_PATTERN_BINDING_SET).setSpan(span).setTask(Task.Add).setJoinType(JoinType.NATURAL_JOIN).setSide(Side.LEFT).setBs(vBs).build();
// Verify the end results of the query match the expected results.
createSpanBatch(fluoClient, joinId, batch);
getMiniFluo().waitForObservers();
verifyCounts(fluoClient, ids, Arrays.asList(5, 5, 5, 0, 5));
}
}
use of org.apache.rya.indexing.pcj.fluo.app.batch.JoinBatchInformation in project incubator-rya by apache.
the class JoinResultUpdater method updateJoinResults.
/**
* Updates the results of a Join node when one of its children has added a
* new Binding Set to its results.
*
* @param tx - The transaction all Fluo queries will use. (not null)
* @param childNodeId - The Node ID of the child whose results received a new Binding Set. (not null)
* @param childBindingSet - The Binding Set that was just emitted by child node. (not null)
* @param joinMetadata - The metadata for the Join that has been notified. (not null)
* @throws Exception The update could not be successfully performed.
*/
public void updateJoinResults(final TransactionBase tx, final String childNodeId, final VisibilityBindingSet childBindingSet, final JoinMetadata joinMetadata) throws Exception {
checkNotNull(tx);
checkNotNull(childNodeId);
checkNotNull(childBindingSet);
checkNotNull(joinMetadata);
log.trace("Transaction ID: " + tx.getStartTimestamp() + "\n" + "Join Node ID: " + joinMetadata.getNodeId() + "\n" + "Child Node ID: " + childNodeId + "\n" + "Child Binding Set:\n" + childBindingSet + "\n");
// Figure out which join algorithm we are going to use.
final IterativeJoin joinAlgorithm;
switch(joinMetadata.getJoinType()) {
case NATURAL_JOIN:
joinAlgorithm = new NaturalJoin();
break;
case LEFT_OUTER_JOIN:
joinAlgorithm = new LeftOuterJoin();
break;
default:
throw new RuntimeException("Unsupported JoinType: " + joinMetadata.getJoinType());
}
// Figure out which side of the join the new binding set appeared on.
final Side emittingSide;
final String siblingId;
if (childNodeId.equals(joinMetadata.getLeftChildNodeId())) {
emittingSide = Side.LEFT;
siblingId = joinMetadata.getRightChildNodeId();
} else {
emittingSide = Side.RIGHT;
siblingId = joinMetadata.getLeftChildNodeId();
}
// Iterates over the sibling node's BindingSets that join with the new binding set.
final Set<VisibilityBindingSet> siblingBindingSets = new HashSet<>();
final Span siblingSpan = getSpan(tx, childNodeId, childBindingSet, siblingId);
final Column siblingColumn = getScanColumnFamily(siblingId);
final Optional<RowColumn> rowColumn = fillSiblingBatch(tx, siblingSpan, siblingColumn, siblingBindingSets, joinMetadata.getJoinBatchSize());
// Iterates over the resulting BindingSets from the join.
final Iterator<VisibilityBindingSet> newJoinResults;
if (emittingSide == Side.LEFT) {
newJoinResults = joinAlgorithm.newLeftResult(childBindingSet, siblingBindingSets.iterator());
} else {
newJoinResults = joinAlgorithm.newRightResult(siblingBindingSets.iterator(), childBindingSet);
}
// Insert the new join binding sets to the Fluo table.
final VariableOrder joinVarOrder = joinMetadata.getVariableOrder();
while (newJoinResults.hasNext()) {
final VisibilityBindingSet newJoinResult = newJoinResults.next();
// Create the Row Key for the emitted binding set. It does not contain visibilities.
final Bytes resultRow = makeRowKey(joinMetadata.getNodeId(), joinVarOrder, newJoinResult);
// Only insert the join Binding Set if it is new or BindingSet contains values not used in resultRow.
if (tx.get(resultRow, FluoQueryColumns.JOIN_BINDING_SET) == null || joinVarOrder.getVariableOrders().size() < newJoinResult.size()) {
// Create the Node Value. It does contain visibilities.
final Bytes nodeValueBytes = BS_SERDE.serialize(newJoinResult);
log.trace("Transaction ID: " + tx.getStartTimestamp() + "\n" + "New Join Result:\n" + newJoinResult + "\n");
tx.set(resultRow, FluoQueryColumns.JOIN_BINDING_SET, nodeValueBytes);
}
}
// update the span and register updated batch job
if (rowColumn.isPresent()) {
final Span newSpan = AbstractBatchBindingSetUpdater.getNewSpan(rowColumn.get(), siblingSpan);
final JoinBatchInformation joinBatch = JoinBatchInformation.builder().setBatchSize(joinMetadata.getJoinBatchSize()).setBs(childBindingSet).setColumn(siblingColumn).setJoinType(joinMetadata.getJoinType()).setSide(emittingSide).setSpan(newSpan).setTask(Task.Add).build();
BatchInformationDAO.addBatch(tx, joinMetadata.getNodeId(), joinBatch);
}
}
use of org.apache.rya.indexing.pcj.fluo.app.batch.JoinBatchInformation in project incubator-rya by apache.
the class BatchIT method simpleJoinDelete.
@Test
public void simpleJoinDelete() throws Exception {
final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + " <urn:predicate_2> ?object2 } ";
try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
RyaURI subj = new RyaURI("urn:subject_1");
RyaStatement statement1 = new RyaStatement(subj, new RyaURI("urn:predicate_1"), null);
RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
Set<RyaStatement> statements1 = getRyaStatements(statement1, 5);
Set<RyaStatement> statements2 = getRyaStatements(statement2, 5);
// Create the PCJ table.
final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
final String pcjId = pcjStorage.createPcj(sparql);
// Tell the Fluo app to maintain the PCJ.
String queryId = new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
List<String> ids = getNodeIdStrings(fluoClient, queryId);
String joinId = ids.get(2);
String rightSp = ids.get(4);
QueryBindingSet bs = new QueryBindingSet();
bs.addBinding("subject", vf.createURI("urn:subject_1"));
bs.addBinding("object1", vf.createURI("urn:object_0"));
VisibilityBindingSet vBs = new VisibilityBindingSet(bs);
// create sharded span for deletion
URI uri = vf.createURI("urn:subject_1");
Bytes prefixBytes = BindingHashShardingFunction.getShardedScanPrefix(rightSp, uri);
Span span = Span.prefix(prefixBytes);
// Stream the data into Fluo.
InsertTriples inserter = new InsertTriples();
inserter.insert(fluoClient, statements1, Optional.absent());
inserter.insert(fluoClient, statements2, Optional.absent());
getMiniFluo().waitForObservers();
verifyCounts(fluoClient, ids, Arrays.asList(25, 25, 25, 5, 5));
JoinBatchInformation batch = JoinBatchInformation.builder().setBatchSize(1).setColumn(FluoQueryColumns.STATEMENT_PATTERN_BINDING_SET).setSpan(span).setTask(Task.Delete).setJoinType(JoinType.NATURAL_JOIN).setSide(Side.LEFT).setBs(vBs).build();
// Verify the end results of the query match the expected results.
createSpanBatch(fluoClient, joinId, batch);
getMiniFluo().waitForObservers();
verifyCounts(fluoClient, ids, Arrays.asList(25, 25, 20, 5, 5));
}
}
use of org.apache.rya.indexing.pcj.fluo.app.batch.JoinBatchInformation in project incubator-rya by apache.
the class BatchInformationSerializerTest method testJoinBatchInformationSerialization.
@Test
public void testJoinBatchInformationSerialization() {
QueryBindingSet bs = new QueryBindingSet();
bs.addBinding("a", new URIImpl("urn:123"));
bs.addBinding("b", new URIImpl("urn:456"));
VisibilityBindingSet vBis = new VisibilityBindingSet(bs, "FOUO");
JoinBatchInformation batch = JoinBatchInformation.builder().setBatchSize(1000).setTask(Task.Update).setColumn(FluoQueryColumns.PERIODIC_QUERY_BINDING_SET).setSpan(Span.prefix(Bytes.of("prefix346"))).setJoinType(JoinType.LEFT_OUTER_JOIN).setSide(Side.RIGHT).setBs(vBis).build();
byte[] batchBytes = BatchInformationSerializer.toBytes(batch);
Optional<BatchInformation> decodedBatch = BatchInformationSerializer.fromBytes(batchBytes);
assertEquals(batch, decodedBatch.get());
}
Aggregations