use of org.apache.fluo.api.client.scanner.ColumnScanner in project incubator-rya by apache.
the class PeriodicNotificationBinPrunerIT method compareFluoCounts.
private void compareFluoCounts(FluoClient client, String pcjId, long bin) {
QueryBindingSet bs = new QueryBindingSet();
bs.addBinding(IncrementalUpdateConstants.PERIODIC_BIN_ID, new LiteralImpl(Long.toString(bin), XMLSchema.LONG));
VariableOrder varOrder = new VariableOrder(IncrementalUpdateConstants.PERIODIC_BIN_ID);
try (Snapshot sx = client.newSnapshot()) {
String fluoQueryId = NodeType.generateNewIdForType(NodeType.QUERY, pcjId);
Set<String> ids = new HashSet<>();
PeriodicQueryUtil.getPeriodicQueryNodeAncestorIds(sx, fluoQueryId, ids);
for (String id : ids) {
NodeType optNode = NodeType.fromNodeId(id).orNull();
if (optNode == null)
throw new RuntimeException("Invalid NodeType.");
Bytes prefix = RowKeyUtil.makeRowKey(id, varOrder, bs);
RowScanner scanner = sx.scanner().fetch(optNode.getResultColumn()).over(Span.prefix(prefix)).byRow().build();
int count = 0;
Iterator<ColumnScanner> colScannerIter = scanner.iterator();
while (colScannerIter.hasNext()) {
ColumnScanner colScanner = colScannerIter.next();
String row = colScanner.getRow().toString();
Iterator<ColumnValue> values = colScanner.iterator();
while (values.hasNext()) {
values.next();
count++;
}
}
Assert.assertEquals(0, count);
}
}
}
use of org.apache.fluo.api.client.scanner.ColumnScanner in project incubator-rya by apache.
the class JoinBatchBindingSetUpdater method fillSiblingBatch.
/**
* Fetches batch to be processed by scanning over the Span specified by the
* {@link JoinBatchInformation}. The number of results is less than or equal
* to the batch size specified by the JoinBatchInformation.
*
* @param tx - Fluo transaction in which batch operation is performed
* @param batch - batch order to be processed
* @param bsSet- set that batch results are added to
* @return Set - containing results of sibling scan.
* @throws Exception
*/
private Optional<RowColumn> fillSiblingBatch(final TransactionBase tx, final JoinBatchInformation batch, final Set<VisibilityBindingSet> bsSet) throws Exception {
final Span span = batch.getSpan();
final Column column = batch.getColumn();
final int batchSize = batch.getBatchSize();
final RowScanner rs = tx.scanner().over(span).fetch(column).byRow().build();
final Iterator<ColumnScanner> colScannerIter = rs.iterator();
boolean batchLimitMet = false;
Bytes row = span.getStart().getRow();
while (colScannerIter.hasNext() && !batchLimitMet) {
final ColumnScanner colScanner = colScannerIter.next();
row = colScanner.getRow();
final Iterator<ColumnValue> iter = colScanner.iterator();
while (iter.hasNext()) {
if (bsSet.size() >= batchSize) {
batchLimitMet = true;
break;
}
bsSet.add(BS_SERDE.deserialize(iter.next().getValue()));
}
}
if (batchLimitMet) {
return Optional.of(new RowColumn(row, column));
} else {
return Optional.empty();
}
}
use of org.apache.fluo.api.client.scanner.ColumnScanner in project incubator-rya by apache.
the class SpanBatchBindingSetUpdater method deleteBatch.
private Optional<RowColumn> deleteBatch(TransactionBase tx, Optional<String> nodeId, Span span, Column column, int batchSize) {
log.trace("Deleting batch of size: " + batchSize + " using Span: " + span + " and Column: " + column);
RowScanner rs = tx.scanner().over(span).fetch(column).byRow().build();
try {
Iterator<ColumnScanner> colScannerIter = rs.iterator();
int count = 0;
boolean batchLimitMet = false;
Bytes row = span.getStart().getRow();
// get prefix if nodeId is specified
Optional<Bytes> prefixBytes = Optional.empty();
if (nodeId.isPresent()) {
NodeType type = NodeType.fromNodeId(nodeId.get()).get();
prefixBytes = Optional.ofNullable(Bytes.of(type.getNodeTypePrefix()));
}
while (colScannerIter.hasNext() && !batchLimitMet) {
ColumnScanner colScanner = colScannerIter.next();
row = colScanner.getRow();
// extract the nodeId from the returned row if a nodeId was passed
// into the SpanBatchInformation. This is to ensure that the returned
// row nodeId is equal to the nodeId passed in to the span batch information
Optional<String> rowNodeId = Optional.empty();
if (prefixBytes.isPresent()) {
rowNodeId = Optional.of(BindingSetRow.makeFromShardedRow(prefixBytes.get(), row).getNodeId());
}
// on the nodeId. This occurs when the hash is not included in the span
if (!rowNodeId.isPresent() || rowNodeId.equals(nodeId)) {
Iterator<ColumnValue> iter = colScanner.iterator();
while (iter.hasNext()) {
if (count >= batchSize) {
batchLimitMet = true;
break;
}
ColumnValue colVal = iter.next();
tx.delete(row, colVal.getColumn());
count++;
}
}
}
if (batchLimitMet) {
return Optional.of(new RowColumn(row));
} else {
return Optional.empty();
}
} catch (Exception e) {
return Optional.empty();
}
}
use of org.apache.fluo.api.client.scanner.ColumnScanner in project incubator-rya by apache.
the class JoinResultUpdater method fillSiblingBatch.
/**
* Fetches batch to be processed by scanning over the Span specified by the
* {@link JoinBatchInformation}. The number of results is less than or equal
* to the batch size specified by the JoinBatchInformation.
*
* @param tx - Fluo transaction in which batch operation is performed
* @param siblingSpan - span of sibling to retrieve elements to join with
* @param bsSet- set that batch results are added to
* @return Set - containing results of sibling scan.
* @throws Exception
*/
private Optional<RowColumn> fillSiblingBatch(final TransactionBase tx, final Span siblingSpan, final Column siblingColumn, final Set<VisibilityBindingSet> bsSet, final int batchSize) throws Exception {
final RowScanner rs = tx.scanner().over(siblingSpan).fetch(siblingColumn).byRow().build();
final Iterator<ColumnScanner> colScannerIter = rs.iterator();
boolean batchLimitMet = false;
Bytes row = siblingSpan.getStart().getRow();
while (colScannerIter.hasNext() && !batchLimitMet) {
final ColumnScanner colScanner = colScannerIter.next();
row = colScanner.getRow();
final Iterator<ColumnValue> iter = colScanner.iterator();
while (iter.hasNext() && !batchLimitMet) {
bsSet.add(BS_SERDE.deserialize(iter.next().getValue()));
// check if batch size has been met and set flag if it has been met
if (bsSet.size() >= batchSize) {
batchLimitMet = true;
}
}
}
if (batchLimitMet) {
return Optional.of(new RowColumn(row, siblingColumn));
} else {
return Optional.absent();
}
}
use of org.apache.fluo.api.client.scanner.ColumnScanner in project incubator-rya by apache.
the class CountStatements method countStatements.
/**
* Get the number of RDF Statements that have been loaded into the Fluo app
* that have not been processed yet.
*
* @param fluo - The connection to Fluo that will be used to fetch the metadata. (not null)
* @return The number of RDF Statements that have been loaded into the Fluo
* app that have not been processed yet.
*/
public BigInteger countStatements(final FluoClient fluo) {
checkNotNull(fluo);
try (Snapshot sx = fluo.newSnapshot()) {
// Limit the scan to the Triples binding set column.
final Iterator<ColumnScanner> rows = sx.scanner().fetch(FluoQueryColumns.TRIPLES).byRow().build().iterator();
BigInteger count = BigInteger.valueOf(0L);
while (rows.hasNext()) {
rows.next();
count = count.add(BigInteger.ONE);
}
return count;
}
}
Aggregations