Search in sources :

Example 16 with Bytes

use of org.apache.fluo.api.data.Bytes in project incubator-rya by apache.

the class JoinResultUpdater method updateJoinResults.

/**
 * Updates the results of a Join node when one of its children has added a
 * new Binding Set to its results.
 *
 * @param tx - The transaction all Fluo queries will use. (not null)
 * @param childNodeId - The Node ID of the child whose results received a new Binding Set. (not null)
 * @param childBindingSet - The Binding Set that was just emitted by child node. (not null)
 * @param joinMetadata - The metadata for the Join that has been notified. (not null)
 * @throws Exception The update could not be successfully performed.
 */
public void updateJoinResults(final TransactionBase tx, final String childNodeId, final VisibilityBindingSet childBindingSet, final JoinMetadata joinMetadata) throws Exception {
    checkNotNull(tx);
    checkNotNull(childNodeId);
    checkNotNull(childBindingSet);
    checkNotNull(joinMetadata);
    log.trace("Transaction ID: " + tx.getStartTimestamp() + "\n" + "Join Node ID: " + joinMetadata.getNodeId() + "\n" + "Child Node ID: " + childNodeId + "\n" + "Child Binding Set:\n" + childBindingSet + "\n");
    // Figure out which join algorithm we are going to use.
    final IterativeJoin joinAlgorithm;
    switch(joinMetadata.getJoinType()) {
        case NATURAL_JOIN:
            joinAlgorithm = new NaturalJoin();
            break;
        case LEFT_OUTER_JOIN:
            joinAlgorithm = new LeftOuterJoin();
            break;
        default:
            throw new RuntimeException("Unsupported JoinType: " + joinMetadata.getJoinType());
    }
    // Figure out which side of the join the new binding set appeared on.
    final Side emittingSide;
    final String siblingId;
    if (childNodeId.equals(joinMetadata.getLeftChildNodeId())) {
        emittingSide = Side.LEFT;
        siblingId = joinMetadata.getRightChildNodeId();
    } else {
        emittingSide = Side.RIGHT;
        siblingId = joinMetadata.getLeftChildNodeId();
    }
    // Iterates over the sibling node's BindingSets that join with the new binding set.
    final Set<VisibilityBindingSet> siblingBindingSets = new HashSet<>();
    final Span siblingSpan = getSpan(tx, childNodeId, childBindingSet, siblingId);
    final Column siblingColumn = getScanColumnFamily(siblingId);
    final Optional<RowColumn> rowColumn = fillSiblingBatch(tx, siblingSpan, siblingColumn, siblingBindingSets, joinMetadata.getJoinBatchSize());
    // Iterates over the resulting BindingSets from the join.
    final Iterator<VisibilityBindingSet> newJoinResults;
    if (emittingSide == Side.LEFT) {
        newJoinResults = joinAlgorithm.newLeftResult(childBindingSet, siblingBindingSets.iterator());
    } else {
        newJoinResults = joinAlgorithm.newRightResult(siblingBindingSets.iterator(), childBindingSet);
    }
    // Insert the new join binding sets to the Fluo table.
    final VariableOrder joinVarOrder = joinMetadata.getVariableOrder();
    while (newJoinResults.hasNext()) {
        final VisibilityBindingSet newJoinResult = newJoinResults.next();
        // Create the Row Key for the emitted binding set. It does not contain visibilities.
        final Bytes resultRow = makeRowKey(joinMetadata.getNodeId(), joinVarOrder, newJoinResult);
        // Only insert the join Binding Set if it is new or BindingSet contains values not used in resultRow.
        if (tx.get(resultRow, FluoQueryColumns.JOIN_BINDING_SET) == null || joinVarOrder.getVariableOrders().size() < newJoinResult.size()) {
            // Create the Node Value. It does contain visibilities.
            final Bytes nodeValueBytes = BS_SERDE.serialize(newJoinResult);
            log.trace("Transaction ID: " + tx.getStartTimestamp() + "\n" + "New Join Result:\n" + newJoinResult + "\n");
            tx.set(resultRow, FluoQueryColumns.JOIN_BINDING_SET, nodeValueBytes);
        }
    }
    // update the span and register updated batch job
    if (rowColumn.isPresent()) {
        final Span newSpan = AbstractBatchBindingSetUpdater.getNewSpan(rowColumn.get(), siblingSpan);
        final JoinBatchInformation joinBatch = JoinBatchInformation.builder().setBatchSize(joinMetadata.getJoinBatchSize()).setBs(childBindingSet).setColumn(siblingColumn).setJoinType(joinMetadata.getJoinType()).setSide(emittingSide).setSpan(newSpan).setTask(Task.Add).build();
        BatchInformationDAO.addBatch(tx, joinMetadata.getNodeId(), joinBatch);
    }
}
Also used : VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) RowColumn(org.apache.fluo.api.data.RowColumn) VariableOrder(org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder) IterativeJoin(org.apache.rya.api.function.join.IterativeJoin) JoinBatchInformation(org.apache.rya.indexing.pcj.fluo.app.batch.JoinBatchInformation) Span(org.apache.fluo.api.data.Span) Side(org.apache.rya.api.function.join.LazyJoiningIterator.Side) Bytes(org.apache.fluo.api.data.Bytes) RowColumn(org.apache.fluo.api.data.RowColumn) Column(org.apache.fluo.api.data.Column) NaturalJoin(org.apache.rya.api.function.join.NaturalJoin) LeftOuterJoin(org.apache.rya.api.function.join.LeftOuterJoin) HashSet(java.util.HashSet)

Example 17 with Bytes

use of org.apache.fluo.api.data.Bytes in project incubator-rya by apache.

the class BatchIT method simpleJoinDelete.

@Test
public void simpleJoinDelete() throws Exception {
    final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + " <urn:predicate_2> ?object2 } ";
    try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
        RyaURI subj = new RyaURI("urn:subject_1");
        RyaStatement statement1 = new RyaStatement(subj, new RyaURI("urn:predicate_1"), null);
        RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
        Set<RyaStatement> statements1 = getRyaStatements(statement1, 5);
        Set<RyaStatement> statements2 = getRyaStatements(statement2, 5);
        // Create the PCJ table.
        final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
        final String pcjId = pcjStorage.createPcj(sparql);
        // Tell the Fluo app to maintain the PCJ.
        String queryId = new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
        List<String> ids = getNodeIdStrings(fluoClient, queryId);
        String joinId = ids.get(2);
        String rightSp = ids.get(4);
        QueryBindingSet bs = new QueryBindingSet();
        bs.addBinding("subject", vf.createURI("urn:subject_1"));
        bs.addBinding("object1", vf.createURI("urn:object_0"));
        VisibilityBindingSet vBs = new VisibilityBindingSet(bs);
        // create sharded span for deletion
        URI uri = vf.createURI("urn:subject_1");
        Bytes prefixBytes = BindingHashShardingFunction.getShardedScanPrefix(rightSp, uri);
        Span span = Span.prefix(prefixBytes);
        // Stream the data into Fluo.
        InsertTriples inserter = new InsertTriples();
        inserter.insert(fluoClient, statements1, Optional.absent());
        inserter.insert(fluoClient, statements2, Optional.absent());
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(25, 25, 25, 5, 5));
        JoinBatchInformation batch = JoinBatchInformation.builder().setBatchSize(1).setColumn(FluoQueryColumns.STATEMENT_PATTERN_BINDING_SET).setSpan(span).setTask(Task.Delete).setJoinType(JoinType.NATURAL_JOIN).setSide(Side.LEFT).setBs(vBs).build();
        // Verify the end results of the query match the expected results.
        createSpanBatch(fluoClient, joinId, batch);
        getMiniFluo().waitForObservers();
        verifyCounts(fluoClient, ids, Arrays.asList(25, 25, 20, 5, 5));
    }
}
Also used : FluoClient(org.apache.fluo.api.client.FluoClient) FluoClientImpl(org.apache.fluo.core.client.FluoClientImpl) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) AccumuloPcjStorage(org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjStorage) InsertTriples(org.apache.rya.indexing.pcj.fluo.api.InsertTriples) RyaStatement(org.apache.rya.api.domain.RyaStatement) CreateFluoPcj(org.apache.rya.indexing.pcj.fluo.api.CreateFluoPcj) JoinBatchInformation(org.apache.rya.indexing.pcj.fluo.app.batch.JoinBatchInformation) URI(org.openrdf.model.URI) RyaURI(org.apache.rya.api.domain.RyaURI) Span(org.apache.fluo.api.data.Span) QueryBindingSet(org.openrdf.query.algebra.evaluation.QueryBindingSet) RyaURI(org.apache.rya.api.domain.RyaURI) Bytes(org.apache.fluo.api.data.Bytes) PrecomputedJoinStorage(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage) Test(org.junit.Test)

Example 18 with Bytes

use of org.apache.fluo.api.data.Bytes in project incubator-rya by apache.

the class TripleObserver method process.

@Override
public void process(final TransactionBase tx, final Bytes brow, final Column column) {
    // Get string representation of triple.
    final RyaStatement ryaStatement = IncUpdateDAO.deserializeTriple(brow);
    log.trace("Transaction ID: {}\nRya Statement: {}\n", tx.getStartTimestamp(), ryaStatement);
    log.trace("Beginging to process triple.");
    final String triple = IncUpdateDAO.getTripleString(ryaStatement);
    Set<String> spIDs = SP_ID_CACHE.getStatementPatternIds(tx);
    // see if triple matches conditions of any of the SP
    for (String spID : spIDs) {
        // Fetch its metadata.
        final StatementPatternMetadata spMetadata = QUERY_METADATA_DAO.readStatementPatternMetadata(tx, spID);
        log.trace("Retrieved metadata: {}", spMetadata);
        // Attempt to match the triple against the pattern.
        final String pattern = spMetadata.getStatementPattern();
        final VariableOrder varOrder = spMetadata.getVariableOrder();
        final String bindingSetString = getBindingSet(triple, pattern, varOrder);
        log.trace("Created binding set match string: {}", bindingSetString);
        // Statement matches to a binding set.
        if (bindingSetString.length() != 0) {
            // Fetch the triple's visibility label.
            final String visibility = tx.gets(brow.toString(), FluoQueryColumns.TRIPLES, "");
            // Make BindingSet and sharded row
            final VisibilityBindingSet visBindingSet = VIS_BS_CONVERTER.convert(bindingSetString, varOrder);
            visBindingSet.setVisibility(visibility);
            Bytes row = BindingHashShardingFunction.addShard(spID, varOrder, visBindingSet);
            // If this is a new Binding Set, then emit it.
            if (tx.get(row, FluoQueryColumns.STATEMENT_PATTERN_BINDING_SET) == null) {
                try {
                    final Bytes valueBytes = BS_SERDE.serialize(visBindingSet);
                    log.trace("Transaction ID: {}\nMatched Statement Pattern: {}\nBinding Set: {}\n", tx.getStartTimestamp(), spID, visBindingSet);
                    tx.set(row, FluoQueryColumns.STATEMENT_PATTERN_BINDING_SET, valueBytes);
                } catch (final Exception e) {
                    log.error("Couldn't serialize a Binding Set. This value will be skipped.", e);
                }
            }
        }
    }
    // Once the triple has been handled, it may be deleted.
    tx.delete(brow, column);
}
Also used : Bytes(org.apache.fluo.api.data.Bytes) VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) StatementPatternMetadata(org.apache.rya.indexing.pcj.fluo.app.query.StatementPatternMetadata) VariableOrder(org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder) RyaStatement(org.apache.rya.api.domain.RyaStatement)

Example 19 with Bytes

use of org.apache.fluo.api.data.Bytes in project incubator-rya by apache.

the class FluoQueryMetadataDAO method readAggregationMetadataBuilder.

private AggregationMetadata.Builder readAggregationMetadataBuilder(final SnapshotBase sx, final String nodeId) {
    requireNonNull(sx);
    requireNonNull(nodeId);
    // Fetch the values from the Fluo table.
    final String rowId = nodeId;
    final Map<Column, String> values = sx.gets(rowId, FluoQueryColumns.AGGREGATION_VARIABLE_ORDER, FluoQueryColumns.AGGREGATION_PARENT_NODE_ID, FluoQueryColumns.AGGREGATION_CHILD_NODE_ID, FluoQueryColumns.AGGREGATION_GROUP_BY_BINDING_NAMES);
    // Return an object holding them.
    final String varOrderString = values.get(FluoQueryColumns.AGGREGATION_VARIABLE_ORDER);
    final VariableOrder varOrder = new VariableOrder(varOrderString);
    final String parentNodeId = values.get(FluoQueryColumns.AGGREGATION_PARENT_NODE_ID);
    final String childNodeId = values.get(FluoQueryColumns.AGGREGATION_CHILD_NODE_ID);
    // Read the Group By variable order if one was present.
    final String groupByString = values.get(FluoQueryColumns.AGGREGATION_GROUP_BY_BINDING_NAMES);
    final VariableOrder groupByVars = groupByString.isEmpty() ? new VariableOrder() : new VariableOrder(groupByString.split(";"));
    // Deserialize the collection of AggregationElements.
    final Bytes aggBytes = sx.get(Bytes.of(nodeId.getBytes(Charsets.UTF_8)), FluoQueryColumns.AGGREGATION_AGGREGATIONS);
    final Collection<AggregationElement> aggregations;
    try (final ValidatingObjectInputStream vois = new ValidatingObjectInputStream(aggBytes.toInputStream())) // // this is how you find classes that you missed in the vois.accept() list, below.
    // { @Override protected void invalidClassNameFound(String className) throws java.io.InvalidClassException {
    // System.out.println("vois.accept(" + className + ".class, ");};};
    {
        // These classes are allowed to be deserialized. Others throw InvalidClassException.
        vois.accept(java.util.ArrayList.class, java.lang.Enum.class, AggregationElement.class, AggregationType.class);
        final Object object = vois.readObject();
        if (!(object instanceof Collection<?>)) {
            throw new InvalidClassException("Object read was not of type Collection. It was: " + object.getClass());
        }
        aggregations = (Collection<AggregationElement>) object;
    } catch (final IOException | ClassNotFoundException e) {
        throw new RuntimeException("Problem encountered while reading AggregationMetadata from the Fluo table. Unable " + "to deserialize the AggregationElements from a byte[].", e);
    }
    final AggregationMetadata.Builder builder = AggregationMetadata.builder(nodeId).setVarOrder(varOrder).setParentNodeId(parentNodeId).setChildNodeId(childNodeId).setGroupByVariableOrder(groupByVars);
    for (final AggregationElement aggregation : aggregations) {
        builder.addAggregation(aggregation);
    }
    return builder;
}
Also used : AggregationElement(org.apache.rya.api.function.aggregation.AggregationElement) VariableOrder(org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder) InvalidClassException(java.io.InvalidClassException) ValidatingObjectInputStream(org.apache.commons.io.serialization.ValidatingObjectInputStream) IOException(java.io.IOException) Bytes(org.apache.fluo.api.data.Bytes) Column(org.apache.fluo.api.data.Column) Collection(java.util.Collection)

Example 20 with Bytes

use of org.apache.fluo.api.data.Bytes in project incubator-rya by apache.

the class PeriodicNotificationBinPrunerIT method compareFluoCounts.

private void compareFluoCounts(FluoClient client, String pcjId, long bin) {
    QueryBindingSet bs = new QueryBindingSet();
    bs.addBinding(IncrementalUpdateConstants.PERIODIC_BIN_ID, new LiteralImpl(Long.toString(bin), XMLSchema.LONG));
    VariableOrder varOrder = new VariableOrder(IncrementalUpdateConstants.PERIODIC_BIN_ID);
    try (Snapshot sx = client.newSnapshot()) {
        String fluoQueryId = NodeType.generateNewIdForType(NodeType.QUERY, pcjId);
        Set<String> ids = new HashSet<>();
        PeriodicQueryUtil.getPeriodicQueryNodeAncestorIds(sx, fluoQueryId, ids);
        for (String id : ids) {
            NodeType optNode = NodeType.fromNodeId(id).orNull();
            if (optNode == null)
                throw new RuntimeException("Invalid NodeType.");
            Bytes prefix = RowKeyUtil.makeRowKey(id, varOrder, bs);
            RowScanner scanner = sx.scanner().fetch(optNode.getResultColumn()).over(Span.prefix(prefix)).byRow().build();
            int count = 0;
            Iterator<ColumnScanner> colScannerIter = scanner.iterator();
            while (colScannerIter.hasNext()) {
                ColumnScanner colScanner = colScannerIter.next();
                String row = colScanner.getRow().toString();
                Iterator<ColumnValue> values = colScanner.iterator();
                while (values.hasNext()) {
                    values.next();
                    count++;
                }
            }
            Assert.assertEquals(0, count);
        }
    }
}
Also used : VariableOrder(org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder) ColumnScanner(org.apache.fluo.api.client.scanner.ColumnScanner) QueryBindingSet(org.openrdf.query.algebra.evaluation.QueryBindingSet) LiteralImpl(org.openrdf.model.impl.LiteralImpl) Snapshot(org.apache.fluo.api.client.Snapshot) Bytes(org.apache.fluo.api.data.Bytes) NodeType(org.apache.rya.indexing.pcj.fluo.app.NodeType) RowScanner(org.apache.fluo.api.client.scanner.RowScanner) ColumnValue(org.apache.fluo.api.data.ColumnValue) HashSet(java.util.HashSet)

Aggregations

Bytes (org.apache.fluo.api.data.Bytes)43 VisibilityBindingSet (org.apache.rya.api.model.VisibilityBindingSet)16 VariableOrder (org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder)14 Column (org.apache.fluo.api.data.Column)9 Test (org.junit.Test)9 ColumnScanner (org.apache.fluo.api.client.scanner.ColumnScanner)8 RowScanner (org.apache.fluo.api.client.scanner.RowScanner)8 NodeType (org.apache.rya.indexing.pcj.fluo.app.NodeType)8 QueryBindingSet (org.openrdf.query.algebra.evaluation.QueryBindingSet)6 FluoClient (org.apache.fluo.api.client.FluoClient)5 ColumnValue (org.apache.fluo.api.data.ColumnValue)5 RowColumn (org.apache.fluo.api.data.RowColumn)5 Span (org.apache.fluo.api.data.Span)5 Transaction (org.apache.fluo.api.client.Transaction)4 RyaStatement (org.apache.rya.api.domain.RyaStatement)4 HashSet (java.util.HashSet)3 Snapshot (org.apache.fluo.api.client.Snapshot)3 RyaURI (org.apache.rya.api.domain.RyaURI)3 JoinBatchInformation (org.apache.rya.indexing.pcj.fluo.app.batch.JoinBatchInformation)3 SpanBatchDeleteInformation (org.apache.rya.indexing.pcj.fluo.app.batch.SpanBatchDeleteInformation)3