Search in sources :

Example 6 with RowColumn

use of org.apache.fluo.api.data.RowColumn in project incubator-rya by apache.

the class JoinBatchBindingSetUpdater method fillSiblingBatch.

/**
 * Fetches batch to be processed by scanning over the Span specified by the
 * {@link JoinBatchInformation}. The number of results is less than or equal
 * to the batch size specified by the JoinBatchInformation.
 *
 * @param tx - Fluo transaction in which batch operation is performed
 * @param batch - batch order to be processed
 * @param bsSet- set that batch results are added to
 * @return Set - containing results of sibling scan.
 * @throws Exception
 */
private Optional<RowColumn> fillSiblingBatch(final TransactionBase tx, final JoinBatchInformation batch, final Set<VisibilityBindingSet> bsSet) throws Exception {
    final Span span = batch.getSpan();
    final Column column = batch.getColumn();
    final int batchSize = batch.getBatchSize();
    final RowScanner rs = tx.scanner().over(span).fetch(column).byRow().build();
    final Iterator<ColumnScanner> colScannerIter = rs.iterator();
    boolean batchLimitMet = false;
    Bytes row = span.getStart().getRow();
    while (colScannerIter.hasNext() && !batchLimitMet) {
        final ColumnScanner colScanner = colScannerIter.next();
        row = colScanner.getRow();
        final Iterator<ColumnValue> iter = colScanner.iterator();
        while (iter.hasNext()) {
            if (bsSet.size() >= batchSize) {
                batchLimitMet = true;
                break;
            }
            bsSet.add(BS_SERDE.deserialize(iter.next().getValue()));
        }
    }
    if (batchLimitMet) {
        return Optional.of(new RowColumn(row, column));
    } else {
        return Optional.empty();
    }
}
Also used : Bytes(org.apache.fluo.api.data.Bytes) RowColumn(org.apache.fluo.api.data.RowColumn) Column(org.apache.fluo.api.data.Column) RowColumn(org.apache.fluo.api.data.RowColumn) RowScanner(org.apache.fluo.api.client.scanner.RowScanner) ColumnScanner(org.apache.fluo.api.client.scanner.ColumnScanner) ColumnValue(org.apache.fluo.api.data.ColumnValue) Span(org.apache.fluo.api.data.Span)

Example 7 with RowColumn

use of org.apache.fluo.api.data.RowColumn in project incubator-rya by apache.

the class SpanBatchBindingSetUpdater method deleteBatch.

private Optional<RowColumn> deleteBatch(TransactionBase tx, Optional<String> nodeId, Span span, Column column, int batchSize) {
    log.trace("Deleting batch of size: " + batchSize + " using Span: " + span + " and Column: " + column);
    RowScanner rs = tx.scanner().over(span).fetch(column).byRow().build();
    try {
        Iterator<ColumnScanner> colScannerIter = rs.iterator();
        int count = 0;
        boolean batchLimitMet = false;
        Bytes row = span.getStart().getRow();
        // get prefix if nodeId is specified
        Optional<Bytes> prefixBytes = Optional.empty();
        if (nodeId.isPresent()) {
            NodeType type = NodeType.fromNodeId(nodeId.get()).get();
            prefixBytes = Optional.ofNullable(Bytes.of(type.getNodeTypePrefix()));
        }
        while (colScannerIter.hasNext() && !batchLimitMet) {
            ColumnScanner colScanner = colScannerIter.next();
            row = colScanner.getRow();
            // extract the nodeId from the returned row if a nodeId was passed
            // into the SpanBatchInformation.  This is to ensure that the returned
            // row nodeId is equal to the nodeId passed in to the span batch information
            Optional<String> rowNodeId = Optional.empty();
            if (prefixBytes.isPresent()) {
                rowNodeId = Optional.of(BindingSetRow.makeFromShardedRow(prefixBytes.get(), row).getNodeId());
            }
            // on the nodeId.  This occurs when the hash is not included in the span
            if (!rowNodeId.isPresent() || rowNodeId.equals(nodeId)) {
                Iterator<ColumnValue> iter = colScanner.iterator();
                while (iter.hasNext()) {
                    if (count >= batchSize) {
                        batchLimitMet = true;
                        break;
                    }
                    ColumnValue colVal = iter.next();
                    tx.delete(row, colVal.getColumn());
                    count++;
                }
            }
        }
        if (batchLimitMet) {
            return Optional.of(new RowColumn(row));
        } else {
            return Optional.empty();
        }
    } catch (Exception e) {
        return Optional.empty();
    }
}
Also used : RowColumn(org.apache.fluo.api.data.RowColumn) ColumnScanner(org.apache.fluo.api.client.scanner.ColumnScanner) Bytes(org.apache.fluo.api.data.Bytes) NodeType(org.apache.rya.indexing.pcj.fluo.app.NodeType) RowScanner(org.apache.fluo.api.client.scanner.RowScanner) ColumnValue(org.apache.fluo.api.data.ColumnValue)

Example 8 with RowColumn

use of org.apache.fluo.api.data.RowColumn in project incubator-rya by apache.

the class JoinResultUpdater method fillSiblingBatch.

/**
 * Fetches batch to be processed by scanning over the Span specified by the
 * {@link JoinBatchInformation}. The number of results is less than or equal
 * to the batch size specified by the JoinBatchInformation.
 *
 * @param tx - Fluo transaction in which batch operation is performed
 * @param siblingSpan - span of sibling to retrieve elements to join with
 * @param bsSet- set that batch results are added to
 * @return Set - containing results of sibling scan.
 * @throws Exception
 */
private Optional<RowColumn> fillSiblingBatch(final TransactionBase tx, final Span siblingSpan, final Column siblingColumn, final Set<VisibilityBindingSet> bsSet, final int batchSize) throws Exception {
    final RowScanner rs = tx.scanner().over(siblingSpan).fetch(siblingColumn).byRow().build();
    final Iterator<ColumnScanner> colScannerIter = rs.iterator();
    boolean batchLimitMet = false;
    Bytes row = siblingSpan.getStart().getRow();
    while (colScannerIter.hasNext() && !batchLimitMet) {
        final ColumnScanner colScanner = colScannerIter.next();
        row = colScanner.getRow();
        final Iterator<ColumnValue> iter = colScanner.iterator();
        while (iter.hasNext() && !batchLimitMet) {
            bsSet.add(BS_SERDE.deserialize(iter.next().getValue()));
            // check if batch size has been met and set flag if it has been met
            if (bsSet.size() >= batchSize) {
                batchLimitMet = true;
            }
        }
    }
    if (batchLimitMet) {
        return Optional.of(new RowColumn(row, siblingColumn));
    } else {
        return Optional.absent();
    }
}
Also used : Bytes(org.apache.fluo.api.data.Bytes) RowColumn(org.apache.fluo.api.data.RowColumn) RowScanner(org.apache.fluo.api.client.scanner.RowScanner) ColumnScanner(org.apache.fluo.api.client.scanner.ColumnScanner) ColumnValue(org.apache.fluo.api.data.ColumnValue)

Aggregations

RowColumn (org.apache.fluo.api.data.RowColumn)8 Column (org.apache.fluo.api.data.Column)6 Span (org.apache.fluo.api.data.Span)6 Bytes (org.apache.fluo.api.data.Bytes)5 ColumnScanner (org.apache.fluo.api.client.scanner.ColumnScanner)3 RowScanner (org.apache.fluo.api.client.scanner.RowScanner)3 ColumnValue (org.apache.fluo.api.data.ColumnValue)3 VisibilityBindingSet (org.apache.rya.api.model.VisibilityBindingSet)3 Task (org.apache.rya.indexing.pcj.fluo.app.batch.BatchInformation.Task)3 VariableOrder (org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder)3 JsonObject (com.google.gson.JsonObject)2 HashSet (java.util.HashSet)2 IterativeJoin (org.apache.rya.api.function.join.IterativeJoin)2 Side (org.apache.rya.api.function.join.LazyJoiningIterator.Side)2 LeftOuterJoin (org.apache.rya.api.function.join.LeftOuterJoin)2 NaturalJoin (org.apache.rya.api.function.join.NaturalJoin)2 NodeType (org.apache.rya.indexing.pcj.fluo.app.NodeType)1 JoinBatchInformation (org.apache.rya.indexing.pcj.fluo.app.batch.JoinBatchInformation)1 JoinMetadata (org.apache.rya.indexing.pcj.fluo.app.query.JoinMetadata)1 JoinType (org.apache.rya.indexing.pcj.fluo.app.query.JoinMetadata.JoinType)1