use of org.apache.fluo.api.data.Bytes in project incubator-rya by apache.
the class JoinResultUpdater method updateJoinResults.
/**
* Updates the results of a Join node when one of its children has added a
* new Binding Set to its results.
*
* @param tx - The transaction all Fluo queries will use. (not null)
* @param childNodeId - The Node ID of the child whose results received a new Binding Set. (not null)
* @param childBindingSet - The Binding Set that was just emitted by child node. (not null)
* @param joinMetadata - The metadata for the Join that has been notified. (not null)
* @throws Exception The update could not be successfully performed.
*/
public void updateJoinResults(final TransactionBase tx, final String childNodeId, final VisibilityBindingSet childBindingSet, final JoinMetadata joinMetadata) throws Exception {
checkNotNull(tx);
checkNotNull(childNodeId);
checkNotNull(childBindingSet);
checkNotNull(joinMetadata);
log.trace("Transaction ID: " + tx.getStartTimestamp() + "\n" + "Join Node ID: " + joinMetadata.getNodeId() + "\n" + "Child Node ID: " + childNodeId + "\n" + "Child Binding Set:\n" + childBindingSet + "\n");
// Figure out which join algorithm we are going to use.
final IterativeJoin joinAlgorithm;
switch(joinMetadata.getJoinType()) {
case NATURAL_JOIN:
joinAlgorithm = new NaturalJoin();
break;
case LEFT_OUTER_JOIN:
joinAlgorithm = new LeftOuterJoin();
break;
default:
throw new RuntimeException("Unsupported JoinType: " + joinMetadata.getJoinType());
}
// Figure out which side of the join the new binding set appeared on.
final Side emittingSide;
final String siblingId;
if (childNodeId.equals(joinMetadata.getLeftChildNodeId())) {
emittingSide = Side.LEFT;
siblingId = joinMetadata.getRightChildNodeId();
} else {
emittingSide = Side.RIGHT;
siblingId = joinMetadata.getLeftChildNodeId();
}
// Iterates over the sibling node's BindingSets that join with the new binding set.
final Set<VisibilityBindingSet> siblingBindingSets = new HashSet<>();
final Span siblingSpan = getSpan(tx, childNodeId, childBindingSet, siblingId);
final Column siblingColumn = getScanColumnFamily(siblingId);
final Optional<RowColumn> rowColumn = fillSiblingBatch(tx, siblingSpan, siblingColumn, siblingBindingSets, joinMetadata.getJoinBatchSize());
// Iterates over the resulting BindingSets from the join.
final Iterator<VisibilityBindingSet> newJoinResults;
if (emittingSide == Side.LEFT) {
newJoinResults = joinAlgorithm.newLeftResult(childBindingSet, siblingBindingSets.iterator());
} else {
newJoinResults = joinAlgorithm.newRightResult(siblingBindingSets.iterator(), childBindingSet);
}
// Insert the new join binding sets to the Fluo table.
final VariableOrder joinVarOrder = joinMetadata.getVariableOrder();
while (newJoinResults.hasNext()) {
final VisibilityBindingSet newJoinResult = newJoinResults.next();
// Create the Row Key for the emitted binding set. It does not contain visibilities.
final Bytes resultRow = makeRowKey(joinMetadata.getNodeId(), joinVarOrder, newJoinResult);
// Only insert the join Binding Set if it is new or BindingSet contains values not used in resultRow.
if (tx.get(resultRow, FluoQueryColumns.JOIN_BINDING_SET) == null || joinVarOrder.getVariableOrders().size() < newJoinResult.size()) {
// Create the Node Value. It does contain visibilities.
final Bytes nodeValueBytes = BS_SERDE.serialize(newJoinResult);
log.trace("Transaction ID: " + tx.getStartTimestamp() + "\n" + "New Join Result:\n" + newJoinResult + "\n");
tx.set(resultRow, FluoQueryColumns.JOIN_BINDING_SET, nodeValueBytes);
}
}
// update the span and register updated batch job
if (rowColumn.isPresent()) {
final Span newSpan = AbstractBatchBindingSetUpdater.getNewSpan(rowColumn.get(), siblingSpan);
final JoinBatchInformation joinBatch = JoinBatchInformation.builder().setBatchSize(joinMetadata.getJoinBatchSize()).setBs(childBindingSet).setColumn(siblingColumn).setJoinType(joinMetadata.getJoinType()).setSide(emittingSide).setSpan(newSpan).setTask(Task.Add).build();
BatchInformationDAO.addBatch(tx, joinMetadata.getNodeId(), joinBatch);
}
}
use of org.apache.fluo.api.data.Bytes in project incubator-rya by apache.
the class BatchIT method simpleJoinDelete.
@Test
public void simpleJoinDelete() throws Exception {
final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + " <urn:predicate_2> ?object2 } ";
try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
RyaURI subj = new RyaURI("urn:subject_1");
RyaStatement statement1 = new RyaStatement(subj, new RyaURI("urn:predicate_1"), null);
RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
Set<RyaStatement> statements1 = getRyaStatements(statement1, 5);
Set<RyaStatement> statements2 = getRyaStatements(statement2, 5);
// Create the PCJ table.
final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
final String pcjId = pcjStorage.createPcj(sparql);
// Tell the Fluo app to maintain the PCJ.
String queryId = new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
List<String> ids = getNodeIdStrings(fluoClient, queryId);
String joinId = ids.get(2);
String rightSp = ids.get(4);
QueryBindingSet bs = new QueryBindingSet();
bs.addBinding("subject", vf.createURI("urn:subject_1"));
bs.addBinding("object1", vf.createURI("urn:object_0"));
VisibilityBindingSet vBs = new VisibilityBindingSet(bs);
// create sharded span for deletion
URI uri = vf.createURI("urn:subject_1");
Bytes prefixBytes = BindingHashShardingFunction.getShardedScanPrefix(rightSp, uri);
Span span = Span.prefix(prefixBytes);
// Stream the data into Fluo.
InsertTriples inserter = new InsertTriples();
inserter.insert(fluoClient, statements1, Optional.absent());
inserter.insert(fluoClient, statements2, Optional.absent());
getMiniFluo().waitForObservers();
verifyCounts(fluoClient, ids, Arrays.asList(25, 25, 25, 5, 5));
JoinBatchInformation batch = JoinBatchInformation.builder().setBatchSize(1).setColumn(FluoQueryColumns.STATEMENT_PATTERN_BINDING_SET).setSpan(span).setTask(Task.Delete).setJoinType(JoinType.NATURAL_JOIN).setSide(Side.LEFT).setBs(vBs).build();
// Verify the end results of the query match the expected results.
createSpanBatch(fluoClient, joinId, batch);
getMiniFluo().waitForObservers();
verifyCounts(fluoClient, ids, Arrays.asList(25, 25, 20, 5, 5));
}
}
use of org.apache.fluo.api.data.Bytes in project incubator-rya by apache.
the class TripleObserver method process.
@Override
public void process(final TransactionBase tx, final Bytes brow, final Column column) {
// Get string representation of triple.
final RyaStatement ryaStatement = IncUpdateDAO.deserializeTriple(brow);
log.trace("Transaction ID: {}\nRya Statement: {}\n", tx.getStartTimestamp(), ryaStatement);
log.trace("Beginging to process triple.");
final String triple = IncUpdateDAO.getTripleString(ryaStatement);
Set<String> spIDs = SP_ID_CACHE.getStatementPatternIds(tx);
// see if triple matches conditions of any of the SP
for (String spID : spIDs) {
// Fetch its metadata.
final StatementPatternMetadata spMetadata = QUERY_METADATA_DAO.readStatementPatternMetadata(tx, spID);
log.trace("Retrieved metadata: {}", spMetadata);
// Attempt to match the triple against the pattern.
final String pattern = spMetadata.getStatementPattern();
final VariableOrder varOrder = spMetadata.getVariableOrder();
final String bindingSetString = getBindingSet(triple, pattern, varOrder);
log.trace("Created binding set match string: {}", bindingSetString);
// Statement matches to a binding set.
if (bindingSetString.length() != 0) {
// Fetch the triple's visibility label.
final String visibility = tx.gets(brow.toString(), FluoQueryColumns.TRIPLES, "");
// Make BindingSet and sharded row
final VisibilityBindingSet visBindingSet = VIS_BS_CONVERTER.convert(bindingSetString, varOrder);
visBindingSet.setVisibility(visibility);
Bytes row = BindingHashShardingFunction.addShard(spID, varOrder, visBindingSet);
// If this is a new Binding Set, then emit it.
if (tx.get(row, FluoQueryColumns.STATEMENT_PATTERN_BINDING_SET) == null) {
try {
final Bytes valueBytes = BS_SERDE.serialize(visBindingSet);
log.trace("Transaction ID: {}\nMatched Statement Pattern: {}\nBinding Set: {}\n", tx.getStartTimestamp(), spID, visBindingSet);
tx.set(row, FluoQueryColumns.STATEMENT_PATTERN_BINDING_SET, valueBytes);
} catch (final Exception e) {
log.error("Couldn't serialize a Binding Set. This value will be skipped.", e);
}
}
}
}
// Once the triple has been handled, it may be deleted.
tx.delete(brow, column);
}
use of org.apache.fluo.api.data.Bytes in project incubator-rya by apache.
the class FluoQueryMetadataDAO method readAggregationMetadataBuilder.
private AggregationMetadata.Builder readAggregationMetadataBuilder(final SnapshotBase sx, final String nodeId) {
requireNonNull(sx);
requireNonNull(nodeId);
// Fetch the values from the Fluo table.
final String rowId = nodeId;
final Map<Column, String> values = sx.gets(rowId, FluoQueryColumns.AGGREGATION_VARIABLE_ORDER, FluoQueryColumns.AGGREGATION_PARENT_NODE_ID, FluoQueryColumns.AGGREGATION_CHILD_NODE_ID, FluoQueryColumns.AGGREGATION_GROUP_BY_BINDING_NAMES);
// Return an object holding them.
final String varOrderString = values.get(FluoQueryColumns.AGGREGATION_VARIABLE_ORDER);
final VariableOrder varOrder = new VariableOrder(varOrderString);
final String parentNodeId = values.get(FluoQueryColumns.AGGREGATION_PARENT_NODE_ID);
final String childNodeId = values.get(FluoQueryColumns.AGGREGATION_CHILD_NODE_ID);
// Read the Group By variable order if one was present.
final String groupByString = values.get(FluoQueryColumns.AGGREGATION_GROUP_BY_BINDING_NAMES);
final VariableOrder groupByVars = groupByString.isEmpty() ? new VariableOrder() : new VariableOrder(groupByString.split(";"));
// Deserialize the collection of AggregationElements.
final Bytes aggBytes = sx.get(Bytes.of(nodeId.getBytes(Charsets.UTF_8)), FluoQueryColumns.AGGREGATION_AGGREGATIONS);
final Collection<AggregationElement> aggregations;
try (final ValidatingObjectInputStream vois = new ValidatingObjectInputStream(aggBytes.toInputStream())) // // this is how you find classes that you missed in the vois.accept() list, below.
// { @Override protected void invalidClassNameFound(String className) throws java.io.InvalidClassException {
// System.out.println("vois.accept(" + className + ".class, ");};};
{
// These classes are allowed to be deserialized. Others throw InvalidClassException.
vois.accept(java.util.ArrayList.class, java.lang.Enum.class, AggregationElement.class, AggregationType.class);
final Object object = vois.readObject();
if (!(object instanceof Collection<?>)) {
throw new InvalidClassException("Object read was not of type Collection. It was: " + object.getClass());
}
aggregations = (Collection<AggregationElement>) object;
} catch (final IOException | ClassNotFoundException e) {
throw new RuntimeException("Problem encountered while reading AggregationMetadata from the Fluo table. Unable " + "to deserialize the AggregationElements from a byte[].", e);
}
final AggregationMetadata.Builder builder = AggregationMetadata.builder(nodeId).setVarOrder(varOrder).setParentNodeId(parentNodeId).setChildNodeId(childNodeId).setGroupByVariableOrder(groupByVars);
for (final AggregationElement aggregation : aggregations) {
builder.addAggregation(aggregation);
}
return builder;
}
use of org.apache.fluo.api.data.Bytes in project incubator-rya by apache.
the class PeriodicNotificationBinPrunerIT method compareFluoCounts.
private void compareFluoCounts(FluoClient client, String pcjId, long bin) {
QueryBindingSet bs = new QueryBindingSet();
bs.addBinding(IncrementalUpdateConstants.PERIODIC_BIN_ID, new LiteralImpl(Long.toString(bin), XMLSchema.LONG));
VariableOrder varOrder = new VariableOrder(IncrementalUpdateConstants.PERIODIC_BIN_ID);
try (Snapshot sx = client.newSnapshot()) {
String fluoQueryId = NodeType.generateNewIdForType(NodeType.QUERY, pcjId);
Set<String> ids = new HashSet<>();
PeriodicQueryUtil.getPeriodicQueryNodeAncestorIds(sx, fluoQueryId, ids);
for (String id : ids) {
NodeType optNode = NodeType.fromNodeId(id).orNull();
if (optNode == null)
throw new RuntimeException("Invalid NodeType.");
Bytes prefix = RowKeyUtil.makeRowKey(id, varOrder, bs);
RowScanner scanner = sx.scanner().fetch(optNode.getResultColumn()).over(Span.prefix(prefix)).byRow().build();
int count = 0;
Iterator<ColumnScanner> colScannerIter = scanner.iterator();
while (colScannerIter.hasNext()) {
ColumnScanner colScanner = colScannerIter.next();
String row = colScanner.getRow().toString();
Iterator<ColumnValue> values = colScanner.iterator();
while (values.hasNext()) {
values.next();
count++;
}
}
Assert.assertEquals(0, count);
}
}
}
Aggregations