Search in sources :

Example 1 with Task

use of org.apache.rya.indexing.pcj.fluo.app.batch.BatchInformation.Task in project incubator-rya by apache.

the class JoinBatchBindingSetUpdater method processBatch.

/**
 * Processes {@link JoinBatchInformation}. Updates the BindingSets
 * associated with the specified nodeId. The BindingSets are processed in
 * batch fashion, where the number of results is indicated by
 * {@link JoinBatchInformation#getBatchSize()}. BindingSets are either
 * Added, Deleted, or Updated according to
 * {@link JoinBatchInformation#getTask()}. In the event that the number of
 * entries that need to be updated exceeds the batch size, the row of the
 * first unprocessed BindingSets is used to create a new JoinBatch job to
 * process the remaining BindingSets.
 * @throws Exception
 */
@Override
public void processBatch(final TransactionBase tx, final Bytes row, final BatchInformation batch) throws Exception {
    super.processBatch(tx, row, batch);
    final String nodeId = BatchRowKeyUtil.getNodeId(row);
    Preconditions.checkArgument(batch instanceof JoinBatchInformation);
    final JoinBatchInformation joinBatch = (JoinBatchInformation) batch;
    final Task task = joinBatch.getTask();
    // Figure out which join algorithm we are going to use.
    final IterativeJoin joinAlgorithm;
    switch(joinBatch.getJoinType()) {
        case NATURAL_JOIN:
            joinAlgorithm = new NaturalJoin();
            break;
        case LEFT_OUTER_JOIN:
            joinAlgorithm = new LeftOuterJoin();
            break;
        default:
            throw new RuntimeException("Unsupported JoinType: " + joinBatch.getJoinType());
    }
    final Set<VisibilityBindingSet> bsSet = new HashSet<>();
    final Optional<RowColumn> rowCol = fillSiblingBatch(tx, joinBatch, bsSet);
    // Iterates over the resulting BindingSets from the join.
    final Iterator<VisibilityBindingSet> newJoinResults;
    final VisibilityBindingSet bs = joinBatch.getBs();
    if (joinBatch.getSide() == Side.LEFT) {
        newJoinResults = joinAlgorithm.newLeftResult(bs, bsSet.iterator());
    } else {
        newJoinResults = joinAlgorithm.newRightResult(bsSet.iterator(), bs);
    }
    // Read join metadata, create new join BindingSets and insert them into the Fluo table.
    final JoinMetadata joinMetadata = CACHE.readJoinMetadata(tx, nodeId);
    final VariableOrder joinVarOrder = joinMetadata.getVariableOrder();
    while (newJoinResults.hasNext()) {
        final VisibilityBindingSet newJoinResult = newJoinResults.next();
        // create BindingSet value
        final Bytes bsBytes = BS_SERDE.serialize(newJoinResult);
        // make rowId
        Bytes rowKey = BindingHashShardingFunction.addShard(nodeId, joinVarOrder, newJoinResult);
        final Column col = FluoQueryColumns.JOIN_BINDING_SET;
        processTask(tx, task, rowKey, col, bsBytes);
    }
    // update the span and register updated batch job
    if (rowCol.isPresent()) {
        final Span newSpan = getNewSpan(rowCol.get(), joinBatch.getSpan());
        joinBatch.setSpan(newSpan);
        BatchInformationDAO.addBatch(tx, nodeId, joinBatch);
    }
}
Also used : Task(org.apache.rya.indexing.pcj.fluo.app.batch.BatchInformation.Task) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) RowColumn(org.apache.fluo.api.data.RowColumn) VariableOrder(org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder) IterativeJoin(org.apache.rya.api.function.join.IterativeJoin) JoinMetadata(org.apache.rya.indexing.pcj.fluo.app.query.JoinMetadata) Span(org.apache.fluo.api.data.Span) Bytes(org.apache.fluo.api.data.Bytes) RowColumn(org.apache.fluo.api.data.RowColumn) Column(org.apache.fluo.api.data.Column) NaturalJoin(org.apache.rya.api.function.join.NaturalJoin) LeftOuterJoin(org.apache.rya.api.function.join.LeftOuterJoin) HashSet(java.util.HashSet)

Example 2 with Task

use of org.apache.rya.indexing.pcj.fluo.app.batch.BatchInformation.Task in project incubator-rya by apache.

the class SpanBatchBindingSetUpdater method processBatch.

/**
 * Process SpanBatchDeleteInformation objects by deleting all entries indicated by Span until batch limit is met.
 *
 * @param tx - Fluo Transaction
 * @param row - Byte row identifying BatchInformation
 * @param batch - SpanBatchDeleteInformation object to be processed
 */
@Override
public void processBatch(TransactionBase tx, Bytes row, BatchInformation batch) throws Exception {
    super.processBatch(tx, row, batch);
    Preconditions.checkArgument(batch instanceof SpanBatchDeleteInformation);
    SpanBatchDeleteInformation spanBatch = (SpanBatchDeleteInformation) batch;
    Optional<String> nodeId = spanBatch.getNodeId();
    Task task = spanBatch.getTask();
    int batchSize = spanBatch.getBatchSize();
    Span span = spanBatch.getSpan();
    Column column = batch.getColumn();
    Optional<RowColumn> rowCol = Optional.empty();
    switch(task) {
        case Add:
            log.trace("The Task Add is not supported for SpanBatchBindingSetUpdater.  Batch " + batch + " will not be processed.");
            break;
        case Delete:
            rowCol = deleteBatch(tx, nodeId, span, column, batchSize);
            break;
        case Update:
            log.trace("The Task Update is not supported for SpanBatchBindingSetUpdater.  Batch " + batch + " will not be processed.");
            break;
        default:
            log.trace("Invalid Task type.  Aborting batch operation.");
            break;
    }
    if (rowCol.isPresent()) {
        Span newSpan = getNewSpan(rowCol.get(), spanBatch.getSpan());
        log.trace("Batch size met.  There are remaining results that need to be deleted.  Creating a new batch of size: " + spanBatch.getBatchSize() + " with Span: " + newSpan + " and Column: " + column);
        spanBatch.setSpan(newSpan);
        BatchInformationDAO.addBatch(tx, BatchRowKeyUtil.getNodeId(row), spanBatch);
    }
}
Also used : Task(org.apache.rya.indexing.pcj.fluo.app.batch.BatchInformation.Task) RowColumn(org.apache.fluo.api.data.RowColumn) Column(org.apache.fluo.api.data.Column) RowColumn(org.apache.fluo.api.data.RowColumn) Span(org.apache.fluo.api.data.Span)

Example 3 with Task

use of org.apache.rya.indexing.pcj.fluo.app.batch.BatchInformation.Task in project incubator-rya by apache.

the class JoinBatchInformationTypeAdapter method deserialize.

@Override
public JoinBatchInformation deserialize(final JsonElement element, final Type typeOfT, final JsonDeserializationContext context) throws JsonParseException {
    final JsonObject json = element.getAsJsonObject();
    final int batchSize = json.get("batchSize").getAsInt();
    final Task task = Task.valueOf(json.get("task").getAsString());
    final String[] colArray = json.get("column").getAsString().split("\u0000");
    final Column column = new Column(colArray[0], colArray[1]);
    final String[] rows = json.get("span").getAsString().split("\u0000");
    final boolean startInc = json.get("startInc").getAsBoolean();
    final boolean endInc = json.get("endInc").getAsBoolean();
    final Span span = new Span(new RowColumn(rows[0]), startInc, new RowColumn(rows[1]), endInc);
    final VariableOrder updateVarOrder = new VariableOrder(json.get("updateVarOrder").getAsString());
    final VisibilityBindingSet bs = converter.convert(json.get("bindingSet").getAsString(), updateVarOrder);
    final Side side = Side.valueOf(json.get("side").getAsString());
    final JoinType join = JoinType.valueOf(json.get("joinType").getAsString());
    return JoinBatchInformation.builder().setBatchSize(batchSize).setTask(task).setSpan(span).setColumn(column).setBs(bs).setSide(side).setJoinType(join).build();
}
Also used : Task(org.apache.rya.indexing.pcj.fluo.app.batch.BatchInformation.Task) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) RowColumn(org.apache.fluo.api.data.RowColumn) VariableOrder(org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder) JsonObject(com.google.gson.JsonObject) JoinType(org.apache.rya.indexing.pcj.fluo.app.query.JoinMetadata.JoinType) Span(org.apache.fluo.api.data.Span) Side(org.apache.rya.api.function.join.LazyJoiningIterator.Side) RowColumn(org.apache.fluo.api.data.RowColumn) Column(org.apache.fluo.api.data.Column)

Aggregations

Column (org.apache.fluo.api.data.Column)3 RowColumn (org.apache.fluo.api.data.RowColumn)3 Span (org.apache.fluo.api.data.Span)3 Task (org.apache.rya.indexing.pcj.fluo.app.batch.BatchInformation.Task)3 VisibilityBindingSet (org.apache.rya.api.model.VisibilityBindingSet)2 VariableOrder (org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder)2 JsonObject (com.google.gson.JsonObject)1 HashSet (java.util.HashSet)1 Bytes (org.apache.fluo.api.data.Bytes)1 IterativeJoin (org.apache.rya.api.function.join.IterativeJoin)1 Side (org.apache.rya.api.function.join.LazyJoiningIterator.Side)1 LeftOuterJoin (org.apache.rya.api.function.join.LeftOuterJoin)1 NaturalJoin (org.apache.rya.api.function.join.NaturalJoin)1 JoinMetadata (org.apache.rya.indexing.pcj.fluo.app.query.JoinMetadata)1 JoinType (org.apache.rya.indexing.pcj.fluo.app.query.JoinMetadata.JoinType)1