Search in sources :

Example 6 with ColumnScanner

use of org.apache.fluo.api.client.scanner.ColumnScanner in project incubator-rya by apache.

the class PeriodicNotificationBinPrunerIT method compareFluoCounts.

private void compareFluoCounts(FluoClient client, String pcjId, long bin) {
    QueryBindingSet bs = new QueryBindingSet();
    bs.addBinding(IncrementalUpdateConstants.PERIODIC_BIN_ID, new LiteralImpl(Long.toString(bin), XMLSchema.LONG));
    VariableOrder varOrder = new VariableOrder(IncrementalUpdateConstants.PERIODIC_BIN_ID);
    try (Snapshot sx = client.newSnapshot()) {
        String fluoQueryId = NodeType.generateNewIdForType(NodeType.QUERY, pcjId);
        Set<String> ids = new HashSet<>();
        PeriodicQueryUtil.getPeriodicQueryNodeAncestorIds(sx, fluoQueryId, ids);
        for (String id : ids) {
            NodeType optNode = NodeType.fromNodeId(id).orNull();
            if (optNode == null)
                throw new RuntimeException("Invalid NodeType.");
            Bytes prefix = RowKeyUtil.makeRowKey(id, varOrder, bs);
            RowScanner scanner = sx.scanner().fetch(optNode.getResultColumn()).over(Span.prefix(prefix)).byRow().build();
            int count = 0;
            Iterator<ColumnScanner> colScannerIter = scanner.iterator();
            while (colScannerIter.hasNext()) {
                ColumnScanner colScanner = colScannerIter.next();
                String row = colScanner.getRow().toString();
                Iterator<ColumnValue> values = colScanner.iterator();
                while (values.hasNext()) {
                    values.next();
                    count++;
                }
            }
            Assert.assertEquals(0, count);
        }
    }
}
Also used : VariableOrder(org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder) ColumnScanner(org.apache.fluo.api.client.scanner.ColumnScanner) QueryBindingSet(org.openrdf.query.algebra.evaluation.QueryBindingSet) LiteralImpl(org.openrdf.model.impl.LiteralImpl) Snapshot(org.apache.fluo.api.client.Snapshot) Bytes(org.apache.fluo.api.data.Bytes) NodeType(org.apache.rya.indexing.pcj.fluo.app.NodeType) RowScanner(org.apache.fluo.api.client.scanner.RowScanner) ColumnValue(org.apache.fluo.api.data.ColumnValue) HashSet(java.util.HashSet)

Example 7 with ColumnScanner

use of org.apache.fluo.api.client.scanner.ColumnScanner in project incubator-rya by apache.

the class JoinBatchBindingSetUpdater method fillSiblingBatch.

/**
 * Fetches batch to be processed by scanning over the Span specified by the
 * {@link JoinBatchInformation}. The number of results is less than or equal
 * to the batch size specified by the JoinBatchInformation.
 *
 * @param tx - Fluo transaction in which batch operation is performed
 * @param batch - batch order to be processed
 * @param bsSet- set that batch results are added to
 * @return Set - containing results of sibling scan.
 * @throws Exception
 */
private Optional<RowColumn> fillSiblingBatch(final TransactionBase tx, final JoinBatchInformation batch, final Set<VisibilityBindingSet> bsSet) throws Exception {
    final Span span = batch.getSpan();
    final Column column = batch.getColumn();
    final int batchSize = batch.getBatchSize();
    final RowScanner rs = tx.scanner().over(span).fetch(column).byRow().build();
    final Iterator<ColumnScanner> colScannerIter = rs.iterator();
    boolean batchLimitMet = false;
    Bytes row = span.getStart().getRow();
    while (colScannerIter.hasNext() && !batchLimitMet) {
        final ColumnScanner colScanner = colScannerIter.next();
        row = colScanner.getRow();
        final Iterator<ColumnValue> iter = colScanner.iterator();
        while (iter.hasNext()) {
            if (bsSet.size() >= batchSize) {
                batchLimitMet = true;
                break;
            }
            bsSet.add(BS_SERDE.deserialize(iter.next().getValue()));
        }
    }
    if (batchLimitMet) {
        return Optional.of(new RowColumn(row, column));
    } else {
        return Optional.empty();
    }
}
Also used : Bytes(org.apache.fluo.api.data.Bytes) RowColumn(org.apache.fluo.api.data.RowColumn) Column(org.apache.fluo.api.data.Column) RowColumn(org.apache.fluo.api.data.RowColumn) RowScanner(org.apache.fluo.api.client.scanner.RowScanner) ColumnScanner(org.apache.fluo.api.client.scanner.ColumnScanner) ColumnValue(org.apache.fluo.api.data.ColumnValue) Span(org.apache.fluo.api.data.Span)

Example 8 with ColumnScanner

use of org.apache.fluo.api.client.scanner.ColumnScanner in project incubator-rya by apache.

the class SpanBatchBindingSetUpdater method deleteBatch.

private Optional<RowColumn> deleteBatch(TransactionBase tx, Optional<String> nodeId, Span span, Column column, int batchSize) {
    log.trace("Deleting batch of size: " + batchSize + " using Span: " + span + " and Column: " + column);
    RowScanner rs = tx.scanner().over(span).fetch(column).byRow().build();
    try {
        Iterator<ColumnScanner> colScannerIter = rs.iterator();
        int count = 0;
        boolean batchLimitMet = false;
        Bytes row = span.getStart().getRow();
        // get prefix if nodeId is specified
        Optional<Bytes> prefixBytes = Optional.empty();
        if (nodeId.isPresent()) {
            NodeType type = NodeType.fromNodeId(nodeId.get()).get();
            prefixBytes = Optional.ofNullable(Bytes.of(type.getNodeTypePrefix()));
        }
        while (colScannerIter.hasNext() && !batchLimitMet) {
            ColumnScanner colScanner = colScannerIter.next();
            row = colScanner.getRow();
            // extract the nodeId from the returned row if a nodeId was passed
            // into the SpanBatchInformation.  This is to ensure that the returned
            // row nodeId is equal to the nodeId passed in to the span batch information
            Optional<String> rowNodeId = Optional.empty();
            if (prefixBytes.isPresent()) {
                rowNodeId = Optional.of(BindingSetRow.makeFromShardedRow(prefixBytes.get(), row).getNodeId());
            }
            // on the nodeId.  This occurs when the hash is not included in the span
            if (!rowNodeId.isPresent() || rowNodeId.equals(nodeId)) {
                Iterator<ColumnValue> iter = colScanner.iterator();
                while (iter.hasNext()) {
                    if (count >= batchSize) {
                        batchLimitMet = true;
                        break;
                    }
                    ColumnValue colVal = iter.next();
                    tx.delete(row, colVal.getColumn());
                    count++;
                }
            }
        }
        if (batchLimitMet) {
            return Optional.of(new RowColumn(row));
        } else {
            return Optional.empty();
        }
    } catch (Exception e) {
        return Optional.empty();
    }
}
Also used : RowColumn(org.apache.fluo.api.data.RowColumn) ColumnScanner(org.apache.fluo.api.client.scanner.ColumnScanner) Bytes(org.apache.fluo.api.data.Bytes) NodeType(org.apache.rya.indexing.pcj.fluo.app.NodeType) RowScanner(org.apache.fluo.api.client.scanner.RowScanner) ColumnValue(org.apache.fluo.api.data.ColumnValue)

Example 9 with ColumnScanner

use of org.apache.fluo.api.client.scanner.ColumnScanner in project incubator-rya by apache.

the class JoinResultUpdater method fillSiblingBatch.

/**
 * Fetches batch to be processed by scanning over the Span specified by the
 * {@link JoinBatchInformation}. The number of results is less than or equal
 * to the batch size specified by the JoinBatchInformation.
 *
 * @param tx - Fluo transaction in which batch operation is performed
 * @param siblingSpan - span of sibling to retrieve elements to join with
 * @param bsSet- set that batch results are added to
 * @return Set - containing results of sibling scan.
 * @throws Exception
 */
private Optional<RowColumn> fillSiblingBatch(final TransactionBase tx, final Span siblingSpan, final Column siblingColumn, final Set<VisibilityBindingSet> bsSet, final int batchSize) throws Exception {
    final RowScanner rs = tx.scanner().over(siblingSpan).fetch(siblingColumn).byRow().build();
    final Iterator<ColumnScanner> colScannerIter = rs.iterator();
    boolean batchLimitMet = false;
    Bytes row = siblingSpan.getStart().getRow();
    while (colScannerIter.hasNext() && !batchLimitMet) {
        final ColumnScanner colScanner = colScannerIter.next();
        row = colScanner.getRow();
        final Iterator<ColumnValue> iter = colScanner.iterator();
        while (iter.hasNext() && !batchLimitMet) {
            bsSet.add(BS_SERDE.deserialize(iter.next().getValue()));
            // check if batch size has been met and set flag if it has been met
            if (bsSet.size() >= batchSize) {
                batchLimitMet = true;
            }
        }
    }
    if (batchLimitMet) {
        return Optional.of(new RowColumn(row, siblingColumn));
    } else {
        return Optional.absent();
    }
}
Also used : Bytes(org.apache.fluo.api.data.Bytes) RowColumn(org.apache.fluo.api.data.RowColumn) RowScanner(org.apache.fluo.api.client.scanner.RowScanner) ColumnScanner(org.apache.fluo.api.client.scanner.ColumnScanner) ColumnValue(org.apache.fluo.api.data.ColumnValue)

Example 10 with ColumnScanner

use of org.apache.fluo.api.client.scanner.ColumnScanner in project incubator-rya by apache.

the class CountStatements method countStatements.

/**
 * Get the number of RDF Statements that have been loaded into the Fluo app
 * that have not been processed yet.
 *
 * @param fluo - The connection to Fluo that will be used to fetch the metadata. (not null)
 * @return The number of RDF Statements that have been loaded into the Fluo
 *   app that have not been processed yet.
 */
public BigInteger countStatements(final FluoClient fluo) {
    checkNotNull(fluo);
    try (Snapshot sx = fluo.newSnapshot()) {
        // Limit the scan to the Triples binding set column.
        final Iterator<ColumnScanner> rows = sx.scanner().fetch(FluoQueryColumns.TRIPLES).byRow().build().iterator();
        BigInteger count = BigInteger.valueOf(0L);
        while (rows.hasNext()) {
            rows.next();
            count = count.add(BigInteger.ONE);
        }
        return count;
    }
}
Also used : Snapshot(org.apache.fluo.api.client.Snapshot) ColumnScanner(org.apache.fluo.api.client.scanner.ColumnScanner) BigInteger(java.math.BigInteger)

Aggregations

ColumnScanner (org.apache.fluo.api.client.scanner.ColumnScanner)10 RowScanner (org.apache.fluo.api.client.scanner.RowScanner)9 Bytes (org.apache.fluo.api.data.Bytes)8 ColumnValue (org.apache.fluo.api.data.ColumnValue)6 Snapshot (org.apache.fluo.api.client.Snapshot)4 NodeType (org.apache.rya.indexing.pcj.fluo.app.NodeType)4 RowColumn (org.apache.fluo.api.data.RowColumn)3 BigInteger (java.math.BigInteger)2 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 Transaction (org.apache.fluo.api.client.Transaction)1 Column (org.apache.fluo.api.data.Column)1 Span (org.apache.fluo.api.data.Span)1 BindingSetRow (org.apache.rya.indexing.pcj.fluo.app.BindingSetRow)1 PeriodicQueryMetadata (org.apache.rya.indexing.pcj.fluo.app.query.PeriodicQueryMetadata)1 VariableOrder (org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder)1 LiteralImpl (org.openrdf.model.impl.LiteralImpl)1 QueryBindingSet (org.openrdf.query.algebra.evaluation.QueryBindingSet)1