Search in sources :

Example 1 with Span

use of org.apache.fluo.api.data.Span in project incubator-rya by apache.

the class JoinBatchBindingSetUpdater method processBatch.

/**
 * Processes {@link JoinBatchInformation}. Updates the BindingSets
 * associated with the specified nodeId. The BindingSets are processed in
 * batch fashion, where the number of results is indicated by
 * {@link JoinBatchInformation#getBatchSize()}. BindingSets are either
 * Added, Deleted, or Updated according to
 * {@link JoinBatchInformation#getTask()}. In the event that the number of
 * entries that need to be updated exceeds the batch size, the row of the
 * first unprocessed BindingSets is used to create a new JoinBatch job to
 * process the remaining BindingSets.
 * @throws Exception
 */
@Override
public void processBatch(final TransactionBase tx, final Bytes row, final BatchInformation batch) throws Exception {
    super.processBatch(tx, row, batch);
    final String nodeId = BatchRowKeyUtil.getNodeId(row);
    Preconditions.checkArgument(batch instanceof JoinBatchInformation);
    final JoinBatchInformation joinBatch = (JoinBatchInformation) batch;
    final Task task = joinBatch.getTask();
    // Figure out which join algorithm we are going to use.
    final IterativeJoin joinAlgorithm;
    switch(joinBatch.getJoinType()) {
        case NATURAL_JOIN:
            joinAlgorithm = new NaturalJoin();
            break;
        case LEFT_OUTER_JOIN:
            joinAlgorithm = new LeftOuterJoin();
            break;
        default:
            throw new RuntimeException("Unsupported JoinType: " + joinBatch.getJoinType());
    }
    final Set<VisibilityBindingSet> bsSet = new HashSet<>();
    final Optional<RowColumn> rowCol = fillSiblingBatch(tx, joinBatch, bsSet);
    // Iterates over the resulting BindingSets from the join.
    final Iterator<VisibilityBindingSet> newJoinResults;
    final VisibilityBindingSet bs = joinBatch.getBs();
    if (joinBatch.getSide() == Side.LEFT) {
        newJoinResults = joinAlgorithm.newLeftResult(bs, bsSet.iterator());
    } else {
        newJoinResults = joinAlgorithm.newRightResult(bsSet.iterator(), bs);
    }
    // Read join metadata, create new join BindingSets and insert them into the Fluo table.
    final JoinMetadata joinMetadata = CACHE.readJoinMetadata(tx, nodeId);
    final VariableOrder joinVarOrder = joinMetadata.getVariableOrder();
    while (newJoinResults.hasNext()) {
        final VisibilityBindingSet newJoinResult = newJoinResults.next();
        // create BindingSet value
        final Bytes bsBytes = BS_SERDE.serialize(newJoinResult);
        // make rowId
        Bytes rowKey = BindingHashShardingFunction.addShard(nodeId, joinVarOrder, newJoinResult);
        final Column col = FluoQueryColumns.JOIN_BINDING_SET;
        processTask(tx, task, rowKey, col, bsBytes);
    }
    // update the span and register updated batch job
    if (rowCol.isPresent()) {
        final Span newSpan = getNewSpan(rowCol.get(), joinBatch.getSpan());
        joinBatch.setSpan(newSpan);
        BatchInformationDAO.addBatch(tx, nodeId, joinBatch);
    }
}
Also used : Task(org.apache.rya.indexing.pcj.fluo.app.batch.BatchInformation.Task) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) RowColumn(org.apache.fluo.api.data.RowColumn) VariableOrder(org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder) IterativeJoin(org.apache.rya.api.function.join.IterativeJoin) JoinMetadata(org.apache.rya.indexing.pcj.fluo.app.query.JoinMetadata) Span(org.apache.fluo.api.data.Span) Bytes(org.apache.fluo.api.data.Bytes) RowColumn(org.apache.fluo.api.data.RowColumn) Column(org.apache.fluo.api.data.Column) NaturalJoin(org.apache.rya.api.function.join.NaturalJoin) LeftOuterJoin(org.apache.rya.api.function.join.LeftOuterJoin) HashSet(java.util.HashSet)

Example 2 with Span

use of org.apache.fluo.api.data.Span in project incubator-rya by apache.

the class SpanBatchBindingSetUpdater method processBatch.

/**
 * Process SpanBatchDeleteInformation objects by deleting all entries indicated by Span until batch limit is met.
 *
 * @param tx - Fluo Transaction
 * @param row - Byte row identifying BatchInformation
 * @param batch - SpanBatchDeleteInformation object to be processed
 */
@Override
public void processBatch(TransactionBase tx, Bytes row, BatchInformation batch) throws Exception {
    super.processBatch(tx, row, batch);
    Preconditions.checkArgument(batch instanceof SpanBatchDeleteInformation);
    SpanBatchDeleteInformation spanBatch = (SpanBatchDeleteInformation) batch;
    Optional<String> nodeId = spanBatch.getNodeId();
    Task task = spanBatch.getTask();
    int batchSize = spanBatch.getBatchSize();
    Span span = spanBatch.getSpan();
    Column column = batch.getColumn();
    Optional<RowColumn> rowCol = Optional.empty();
    switch(task) {
        case Add:
            log.trace("The Task Add is not supported for SpanBatchBindingSetUpdater.  Batch " + batch + " will not be processed.");
            break;
        case Delete:
            rowCol = deleteBatch(tx, nodeId, span, column, batchSize);
            break;
        case Update:
            log.trace("The Task Update is not supported for SpanBatchBindingSetUpdater.  Batch " + batch + " will not be processed.");
            break;
        default:
            log.trace("Invalid Task type.  Aborting batch operation.");
            break;
    }
    if (rowCol.isPresent()) {
        Span newSpan = getNewSpan(rowCol.get(), spanBatch.getSpan());
        log.trace("Batch size met.  There are remaining results that need to be deleted.  Creating a new batch of size: " + spanBatch.getBatchSize() + " with Span: " + newSpan + " and Column: " + column);
        spanBatch.setSpan(newSpan);
        BatchInformationDAO.addBatch(tx, BatchRowKeyUtil.getNodeId(row), spanBatch);
    }
}
Also used : Task(org.apache.rya.indexing.pcj.fluo.app.batch.BatchInformation.Task) RowColumn(org.apache.fluo.api.data.RowColumn) Column(org.apache.fluo.api.data.Column) RowColumn(org.apache.fluo.api.data.RowColumn) Span(org.apache.fluo.api.data.Span)

Example 3 with Span

use of org.apache.fluo.api.data.Span in project incubator-rya by apache.

the class BatchIT method simpleJoinAdd.

@Test
public void simpleJoinAdd() throws Exception {
    final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + " <urn:predicate_2> ?object2 } ";
    try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
        RyaURI subj = new RyaURI("urn:subject_1");
        RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
        Set<RyaStatement> statements2 = getRyaStatements(statement2, 5);
        // Create the PCJ table.
        final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
        final String pcjId = pcjStorage.createPcj(sparql);
        // Tell the Fluo app to maintain the PCJ.
        String queryId = new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
        List<String> ids = getNodeIdStrings(fluoClient, queryId);
        String joinId = ids.get(2);
        String rightSp = ids.get(4);
        QueryBindingSet bs = new QueryBindingSet();
        bs.addBinding("subject", vf.createURI("urn:subject_1"));
        bs.addBinding("object1", vf.createURI("urn:object_0"));
        VisibilityBindingSet vBs = new VisibilityBindingSet(bs);
        URI uri = vf.createURI("urn:subject_1");
        Bytes prefixBytes = BindingHashShardingFunction.getShardedScanPrefix(rightSp, uri);
        Span span = Span.prefix(prefixBytes);
        // Stream the data into Fluo.
        InsertTriples inserter = new InsertTriples();
        inserter.insert(fluoClient, statements2, Optional.absent());
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(0, 0, 0, 0, 5));
        JoinBatchInformation batch = JoinBatchInformation.builder().setBatchSize(1).setColumn(FluoQueryColumns.STATEMENT_PATTERN_BINDING_SET).setSpan(span).setTask(Task.Add).setJoinType(JoinType.NATURAL_JOIN).setSide(Side.LEFT).setBs(vBs).build();
        // Verify the end results of the query match the expected results.
        createSpanBatch(fluoClient, joinId, batch);
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(5, 5, 5, 0, 5));
    }
}
Also used : FluoClient(org.apache.fluo.api.client.FluoClient) FluoClientImpl(org.apache.fluo.core.client.FluoClientImpl) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) InsertTriples(org.apache.rya.indexing.pcj.fluo.api.InsertTriples) RyaStatement(org.apache.rya.api.domain.RyaStatement) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) JoinBatchInformation(org.apache.rya.indexing.pcj.fluo.app.batch.JoinBatchInformation) URI(org.openrdf.model.URI) RyaURI(org.apache.rya.api.domain.RyaURI) Span(org.apache.fluo.api.data.Span) QueryBindingSet(org.openrdf.query.algebra.evaluation.QueryBindingSet) RyaURI(org.apache.rya.api.domain.RyaURI) Bytes(org.apache.fluo.api.data.Bytes) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) Test(org.junit.Test)

Example 4 with Span

use of org.apache.fluo.api.data.Span in project incubator-rya by apache.

the class JoinResultUpdater method updateJoinResults.

/**
 * Updates the results of a Join node when one of its children has added a
 * new Binding Set to its results.
 *
 * @param tx - The transaction all Fluo queries will use. (not null)
 * @param childNodeId - The Node ID of the child whose results received a new Binding Set. (not null)
 * @param childBindingSet - The Binding Set that was just emitted by child node. (not null)
 * @param joinMetadata - The metadata for the Join that has been notified. (not null)
 * @throws Exception The update could not be successfully performed.
 */
public void updateJoinResults(final TransactionBase tx, final String childNodeId, final VisibilityBindingSet childBindingSet, final JoinMetadata joinMetadata) throws Exception {
    checkNotNull(tx);
    checkNotNull(childNodeId);
    checkNotNull(childBindingSet);
    checkNotNull(joinMetadata);
    log.trace("Transaction ID: " + tx.getStartTimestamp() + "\n" + "Join Node ID: " + joinMetadata.getNodeId() + "\n" + "Child Node ID: " + childNodeId + "\n" + "Child Binding Set:\n" + childBindingSet + "\n");
    // Figure out which join algorithm we are going to use.
    final IterativeJoin joinAlgorithm;
    switch(joinMetadata.getJoinType()) {
        case NATURAL_JOIN:
            joinAlgorithm = new NaturalJoin();
            break;
        case LEFT_OUTER_JOIN:
            joinAlgorithm = new LeftOuterJoin();
            break;
        default:
            throw new RuntimeException("Unsupported JoinType: " + joinMetadata.getJoinType());
    }
    // Figure out which side of the join the new binding set appeared on.
    final Side emittingSide;
    final String siblingId;
    if (childNodeId.equals(joinMetadata.getLeftChildNodeId())) {
        emittingSide = Side.LEFT;
        siblingId = joinMetadata.getRightChildNodeId();
    } else {
        emittingSide = Side.RIGHT;
        siblingId = joinMetadata.getLeftChildNodeId();
    }
    // Iterates over the sibling node's BindingSets that join with the new binding set.
    final Set<VisibilityBindingSet> siblingBindingSets = new HashSet<>();
    final Span siblingSpan = getSpan(tx, childNodeId, childBindingSet, siblingId);
    final Column siblingColumn = getScanColumnFamily(siblingId);
    final Optional<RowColumn> rowColumn = fillSiblingBatch(tx, siblingSpan, siblingColumn, siblingBindingSets, joinMetadata.getJoinBatchSize());
    // Iterates over the resulting BindingSets from the join.
    final Iterator<VisibilityBindingSet> newJoinResults;
    if (emittingSide == Side.LEFT) {
        newJoinResults = joinAlgorithm.newLeftResult(childBindingSet, siblingBindingSets.iterator());
    } else {
        newJoinResults = joinAlgorithm.newRightResult(siblingBindingSets.iterator(), childBindingSet);
    }
    // Insert the new join binding sets to the Fluo table.
    final VariableOrder joinVarOrder = joinMetadata.getVariableOrder();
    while (newJoinResults.hasNext()) {
        final VisibilityBindingSet newJoinResult = newJoinResults.next();
        // Create the Row Key for the emitted binding set. It does not contain visibilities.
        final Bytes resultRow = makeRowKey(joinMetadata.getNodeId(), joinVarOrder, newJoinResult);
        // Only insert the join Binding Set if it is new or BindingSet contains values not used in resultRow.
        if (tx.get(resultRow, FluoQueryColumns.JOIN_BINDING_SET) == null || joinVarOrder.getVariableOrders().size() < newJoinResult.size()) {
            // Create the Node Value. It does contain visibilities.
            final Bytes nodeValueBytes = BS_SERDE.serialize(newJoinResult);
            log.trace("Transaction ID: " + tx.getStartTimestamp() + "\n" + "New Join Result:\n" + newJoinResult + "\n");
            tx.set(resultRow, FluoQueryColumns.JOIN_BINDING_SET, nodeValueBytes);
        }
    }
    // update the span and register updated batch job
    if (rowColumn.isPresent()) {
        final Span newSpan = AbstractBatchBindingSetUpdater.getNewSpan(rowColumn.get(), siblingSpan);
        final JoinBatchInformation joinBatch = JoinBatchInformation.builder().setBatchSize(joinMetadata.getJoinBatchSize()).setBs(childBindingSet).setColumn(siblingColumn).setJoinType(joinMetadata.getJoinType()).setSide(emittingSide).setSpan(newSpan).setTask(Task.Add).build();
        BatchInformationDAO.addBatch(tx, joinMetadata.getNodeId(), joinBatch);
    }
}
Also used : VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) RowColumn(org.apache.fluo.api.data.RowColumn) VariableOrder(org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder) IterativeJoin(org.apache.rya.api.function.join.IterativeJoin) JoinBatchInformation(org.apache.rya.indexing.pcj.fluo.app.batch.JoinBatchInformation) Span(org.apache.fluo.api.data.Span) Side(org.apache.rya.api.function.join.LazyJoiningIterator.Side) Bytes(org.apache.fluo.api.data.Bytes) RowColumn(org.apache.fluo.api.data.RowColumn) Column(org.apache.fluo.api.data.Column) NaturalJoin(org.apache.rya.api.function.join.NaturalJoin) LeftOuterJoin(org.apache.rya.api.function.join.LeftOuterJoin) HashSet(java.util.HashSet)

Example 5 with Span

use of org.apache.fluo.api.data.Span in project incubator-rya by apache.

the class BatchIT method simpleJoinDelete.

@Test
public void simpleJoinDelete() throws Exception {
    final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + " <urn:predicate_2> ?object2 } ";
    try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
        RyaURI subj = new RyaURI("urn:subject_1");
        RyaStatement statement1 = new RyaStatement(subj, new RyaURI("urn:predicate_1"), null);
        RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
        Set<RyaStatement> statements1 = getRyaStatements(statement1, 5);
        Set<RyaStatement> statements2 = getRyaStatements(statement2, 5);
        // Create the PCJ table.
        final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
        final String pcjId = pcjStorage.createPcj(sparql);
        // Tell the Fluo app to maintain the PCJ.
        String queryId = new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
        List<String> ids = getNodeIdStrings(fluoClient, queryId);
        String joinId = ids.get(2);
        String rightSp = ids.get(4);
        QueryBindingSet bs = new QueryBindingSet();
        bs.addBinding("subject", vf.createURI("urn:subject_1"));
        bs.addBinding("object1", vf.createURI("urn:object_0"));
        VisibilityBindingSet vBs = new VisibilityBindingSet(bs);
        // create sharded span for deletion
        URI uri = vf.createURI("urn:subject_1");
        Bytes prefixBytes = BindingHashShardingFunction.getShardedScanPrefix(rightSp, uri);
        Span span = Span.prefix(prefixBytes);
        // Stream the data into Fluo.
        InsertTriples inserter = new InsertTriples();
        inserter.insert(fluoClient, statements1, Optional.absent());
        inserter.insert(fluoClient, statements2, Optional.absent());
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(25, 25, 25, 5, 5));
        JoinBatchInformation batch = JoinBatchInformation.builder().setBatchSize(1).setColumn(FluoQueryColumns.STATEMENT_PATTERN_BINDING_SET).setSpan(span).setTask(Task.Delete).setJoinType(JoinType.NATURAL_JOIN).setSide(Side.LEFT).setBs(vBs).build();
        // Verify the end results of the query match the expected results.
        createSpanBatch(fluoClient, joinId, batch);
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(25, 25, 20, 5, 5));
    }
}
Also used : FluoClient(org.apache.fluo.api.client.FluoClient) FluoClientImpl(org.apache.fluo.core.client.FluoClientImpl) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) InsertTriples(org.apache.rya.indexing.pcj.fluo.api.InsertTriples) RyaStatement(org.apache.rya.api.domain.RyaStatement) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) JoinBatchInformation(org.apache.rya.indexing.pcj.fluo.app.batch.JoinBatchInformation) URI(org.openrdf.model.URI) RyaURI(org.apache.rya.api.domain.RyaURI) Span(org.apache.fluo.api.data.Span) QueryBindingSet(org.openrdf.query.algebra.evaluation.QueryBindingSet) RyaURI(org.apache.rya.api.domain.RyaURI) Bytes(org.apache.fluo.api.data.Bytes) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) Test(org.junit.Test)

Aggregations

Span (org.apache.fluo.api.data.Span)10 Column (org.apache.fluo.api.data.Column)8 RowColumn (org.apache.fluo.api.data.RowColumn)8 Bytes (org.apache.fluo.api.data.Bytes)5 VisibilityBindingSet (org.apache.rya.api.model.VisibilityBindingSet)5 JsonObject (com.google.gson.JsonObject)4 VariableOrder (org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder)4 Task (org.apache.rya.indexing.pcj.fluo.app.batch.BatchInformation.Task)3 JoinBatchInformation (org.apache.rya.indexing.pcj.fluo.app.batch.JoinBatchInformation)3 JsonPrimitive (com.google.gson.JsonPrimitive)2 HashSet (java.util.HashSet)2 FluoClient (org.apache.fluo.api.client.FluoClient)2 FluoClientImpl (org.apache.fluo.core.client.FluoClientImpl)2 RyaStatement (org.apache.rya.api.domain.RyaStatement)2 RyaURI (org.apache.rya.api.domain.RyaURI)2 IterativeJoin (org.apache.rya.api.function.join.IterativeJoin)2 Side (org.apache.rya.api.function.join.LazyJoiningIterator.Side)2 LeftOuterJoin (org.apache.rya.api.function.join.LeftOuterJoin)2 NaturalJoin (org.apache.rya.api.function.join.NaturalJoin)2 CreateFluoPcj (org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj)2