use of org.apache.fluo.api.data.RowColumn in project incubator-rya by apache.
the class JoinBatchBindingSetUpdater method processBatch.
/**
* Processes {@link JoinBatchInformation}. Updates the BindingSets
* associated with the specified nodeId. The BindingSets are processed in
* batch fashion, where the number of results is indicated by
* {@link JoinBatchInformation#getBatchSize()}. BindingSets are either
* Added, Deleted, or Updated according to
* {@link JoinBatchInformation#getTask()}. In the event that the number of
* entries that need to be updated exceeds the batch size, the row of the
* first unprocessed BindingSets is used to create a new JoinBatch job to
* process the remaining BindingSets.
* @throws Exception
*/
@Override
public void processBatch(final TransactionBase tx, final Bytes row, final BatchInformation batch) throws Exception {
super.processBatch(tx, row, batch);
final String nodeId = BatchRowKeyUtil.getNodeId(row);
Preconditions.checkArgument(batch instanceof JoinBatchInformation);
final JoinBatchInformation joinBatch = (JoinBatchInformation) batch;
final Task task = joinBatch.getTask();
// Figure out which join algorithm we are going to use.
final IterativeJoin joinAlgorithm;
switch(joinBatch.getJoinType()) {
case NATURAL_JOIN:
joinAlgorithm = new NaturalJoin();
break;
case LEFT_OUTER_JOIN:
joinAlgorithm = new LeftOuterJoin();
break;
default:
throw new RuntimeException("Unsupported JoinType: " + joinBatch.getJoinType());
}
final Set<VisibilityBindingSet> bsSet = new HashSet<>();
final Optional<RowColumn> rowCol = fillSiblingBatch(tx, joinBatch, bsSet);
// Iterates over the resulting BindingSets from the join.
final Iterator<VisibilityBindingSet> newJoinResults;
final VisibilityBindingSet bs = joinBatch.getBs();
if (joinBatch.getSide() == Side.LEFT) {
newJoinResults = joinAlgorithm.newLeftResult(bs, bsSet.iterator());
} else {
newJoinResults = joinAlgorithm.newRightResult(bsSet.iterator(), bs);
}
// Read join metadata, create new join BindingSets and insert them into the Fluo table.
final JoinMetadata joinMetadata = CACHE.readJoinMetadata(tx, nodeId);
final VariableOrder joinVarOrder = joinMetadata.getVariableOrder();
while (newJoinResults.hasNext()) {
final VisibilityBindingSet newJoinResult = newJoinResults.next();
// create BindingSet value
final Bytes bsBytes = BS_SERDE.serialize(newJoinResult);
// make rowId
Bytes rowKey = BindingHashShardingFunction.addShard(nodeId, joinVarOrder, newJoinResult);
final Column col = FluoQueryColumns.JOIN_BINDING_SET;
processTask(tx, task, rowKey, col, bsBytes);
}
// update the span and register updated batch job
if (rowCol.isPresent()) {
final Span newSpan = getNewSpan(rowCol.get(), joinBatch.getSpan());
joinBatch.setSpan(newSpan);
BatchInformationDAO.addBatch(tx, nodeId, joinBatch);
}
}
use of org.apache.fluo.api.data.RowColumn in project incubator-rya by apache.
the class SpanBatchBindingSetUpdater method processBatch.
/**
* Process SpanBatchDeleteInformation objects by deleting all entries indicated by Span until batch limit is met.
*
* @param tx - Fluo Transaction
* @param row - Byte row identifying BatchInformation
* @param batch - SpanBatchDeleteInformation object to be processed
*/
@Override
public void processBatch(TransactionBase tx, Bytes row, BatchInformation batch) throws Exception {
super.processBatch(tx, row, batch);
Preconditions.checkArgument(batch instanceof SpanBatchDeleteInformation);
SpanBatchDeleteInformation spanBatch = (SpanBatchDeleteInformation) batch;
Optional<String> nodeId = spanBatch.getNodeId();
Task task = spanBatch.getTask();
int batchSize = spanBatch.getBatchSize();
Span span = spanBatch.getSpan();
Column column = batch.getColumn();
Optional<RowColumn> rowCol = Optional.empty();
switch(task) {
case Add:
log.trace("The Task Add is not supported for SpanBatchBindingSetUpdater. Batch " + batch + " will not be processed.");
break;
case Delete:
rowCol = deleteBatch(tx, nodeId, span, column, batchSize);
break;
case Update:
log.trace("The Task Update is not supported for SpanBatchBindingSetUpdater. Batch " + batch + " will not be processed.");
break;
default:
log.trace("Invalid Task type. Aborting batch operation.");
break;
}
if (rowCol.isPresent()) {
Span newSpan = getNewSpan(rowCol.get(), spanBatch.getSpan());
log.trace("Batch size met. There are remaining results that need to be deleted. Creating a new batch of size: " + spanBatch.getBatchSize() + " with Span: " + newSpan + " and Column: " + column);
spanBatch.setSpan(newSpan);
BatchInformationDAO.addBatch(tx, BatchRowKeyUtil.getNodeId(row), spanBatch);
}
}
use of org.apache.fluo.api.data.RowColumn in project incubator-rya by apache.
the class JoinResultUpdater method updateJoinResults.
/**
* Updates the results of a Join node when one of its children has added a
* new Binding Set to its results.
*
* @param tx - The transaction all Fluo queries will use. (not null)
* @param childNodeId - The Node ID of the child whose results received a new Binding Set. (not null)
* @param childBindingSet - The Binding Set that was just emitted by child node. (not null)
* @param joinMetadata - The metadata for the Join that has been notified. (not null)
* @throws Exception The update could not be successfully performed.
*/
public void updateJoinResults(final TransactionBase tx, final String childNodeId, final VisibilityBindingSet childBindingSet, final JoinMetadata joinMetadata) throws Exception {
checkNotNull(tx);
checkNotNull(childNodeId);
checkNotNull(childBindingSet);
checkNotNull(joinMetadata);
log.trace("Transaction ID: " + tx.getStartTimestamp() + "\n" + "Join Node ID: " + joinMetadata.getNodeId() + "\n" + "Child Node ID: " + childNodeId + "\n" + "Child Binding Set:\n" + childBindingSet + "\n");
// Figure out which join algorithm we are going to use.
final IterativeJoin joinAlgorithm;
switch(joinMetadata.getJoinType()) {
case NATURAL_JOIN:
joinAlgorithm = new NaturalJoin();
break;
case LEFT_OUTER_JOIN:
joinAlgorithm = new LeftOuterJoin();
break;
default:
throw new RuntimeException("Unsupported JoinType: " + joinMetadata.getJoinType());
}
// Figure out which side of the join the new binding set appeared on.
final Side emittingSide;
final String siblingId;
if (childNodeId.equals(joinMetadata.getLeftChildNodeId())) {
emittingSide = Side.LEFT;
siblingId = joinMetadata.getRightChildNodeId();
} else {
emittingSide = Side.RIGHT;
siblingId = joinMetadata.getLeftChildNodeId();
}
// Iterates over the sibling node's BindingSets that join with the new binding set.
final Set<VisibilityBindingSet> siblingBindingSets = new HashSet<>();
final Span siblingSpan = getSpan(tx, childNodeId, childBindingSet, siblingId);
final Column siblingColumn = getScanColumnFamily(siblingId);
final Optional<RowColumn> rowColumn = fillSiblingBatch(tx, siblingSpan, siblingColumn, siblingBindingSets, joinMetadata.getJoinBatchSize());
// Iterates over the resulting BindingSets from the join.
final Iterator<VisibilityBindingSet> newJoinResults;
if (emittingSide == Side.LEFT) {
newJoinResults = joinAlgorithm.newLeftResult(childBindingSet, siblingBindingSets.iterator());
} else {
newJoinResults = joinAlgorithm.newRightResult(siblingBindingSets.iterator(), childBindingSet);
}
// Insert the new join binding sets to the Fluo table.
final VariableOrder joinVarOrder = joinMetadata.getVariableOrder();
while (newJoinResults.hasNext()) {
final VisibilityBindingSet newJoinResult = newJoinResults.next();
// Create the Row Key for the emitted binding set. It does not contain visibilities.
final Bytes resultRow = makeRowKey(joinMetadata.getNodeId(), joinVarOrder, newJoinResult);
// Only insert the join Binding Set if it is new or BindingSet contains values not used in resultRow.
if (tx.get(resultRow, FluoQueryColumns.JOIN_BINDING_SET) == null || joinVarOrder.getVariableOrders().size() < newJoinResult.size()) {
// Create the Node Value. It does contain visibilities.
final Bytes nodeValueBytes = BS_SERDE.serialize(newJoinResult);
log.trace("Transaction ID: " + tx.getStartTimestamp() + "\n" + "New Join Result:\n" + newJoinResult + "\n");
tx.set(resultRow, FluoQueryColumns.JOIN_BINDING_SET, nodeValueBytes);
}
}
// update the span and register updated batch job
if (rowColumn.isPresent()) {
final Span newSpan = AbstractBatchBindingSetUpdater.getNewSpan(rowColumn.get(), siblingSpan);
final JoinBatchInformation joinBatch = JoinBatchInformation.builder().setBatchSize(joinMetadata.getJoinBatchSize()).setBs(childBindingSet).setColumn(siblingColumn).setJoinType(joinMetadata.getJoinType()).setSide(emittingSide).setSpan(newSpan).setTask(Task.Add).build();
BatchInformationDAO.addBatch(tx, joinMetadata.getNodeId(), joinBatch);
}
}
use of org.apache.fluo.api.data.RowColumn in project incubator-rya by apache.
the class JoinBatchInformationTypeAdapter method deserialize.
@Override
public JoinBatchInformation deserialize(final JsonElement element, final Type typeOfT, final JsonDeserializationContext context) throws JsonParseException {
final JsonObject json = element.getAsJsonObject();
final int batchSize = json.get("batchSize").getAsInt();
final Task task = Task.valueOf(json.get("task").getAsString());
final String[] colArray = json.get("column").getAsString().split("\u0000");
final Column column = new Column(colArray[0], colArray[1]);
final String[] rows = json.get("span").getAsString().split("\u0000");
final boolean startInc = json.get("startInc").getAsBoolean();
final boolean endInc = json.get("endInc").getAsBoolean();
final Span span = new Span(new RowColumn(rows[0]), startInc, new RowColumn(rows[1]), endInc);
final VariableOrder updateVarOrder = new VariableOrder(json.get("updateVarOrder").getAsString());
final VisibilityBindingSet bs = converter.convert(json.get("bindingSet").getAsString(), updateVarOrder);
final Side side = Side.valueOf(json.get("side").getAsString());
final JoinType join = JoinType.valueOf(json.get("joinType").getAsString());
return JoinBatchInformation.builder().setBatchSize(batchSize).setTask(task).setSpan(span).setColumn(column).setBs(bs).setSide(side).setJoinType(join).build();
}
use of org.apache.fluo.api.data.RowColumn in project incubator-rya by apache.
the class SpanBatchInformationTypeAdapter method deserialize.
@Override
public SpanBatchDeleteInformation deserialize(JsonElement element, Type typeOfT, JsonDeserializationContext context) throws JsonParseException {
JsonObject json = element.getAsJsonObject();
int batchSize = json.get("batchSize").getAsInt();
String[] colArray = json.get("column").getAsString().split("\u0000");
Column column = new Column(colArray[0], colArray[1]);
String[] rows = json.get("span").getAsString().split("\u0000");
boolean startInc = json.get("startInc").getAsBoolean();
boolean endInc = json.get("endInc").getAsBoolean();
Span span = new Span(new RowColumn(rows[0]), startInc, new RowColumn(rows[1]), endInc);
String nodeId = json.get("nodeId").getAsString();
Optional<String> id = Optional.empty();
if (!nodeId.isEmpty()) {
id = Optional.of(nodeId);
}
return SpanBatchDeleteInformation.builder().setNodeId(id).setBatchSize(batchSize).setSpan(span).setColumn(column).build();
}
Aggregations