use of com.facebook.presto.accumulo.model.AccumuloColumnConstraint in project presto by prestodb.
the class AccumuloSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableLayoutHandle layout) {
AccumuloTableLayoutHandle layoutHandle = (AccumuloTableLayoutHandle) layout;
AccumuloTableHandle tableHandle = layoutHandle.getTable();
String schemaName = tableHandle.getSchema();
String tableName = tableHandle.getTable();
String rowIdName = tableHandle.getRowId();
// Get non-row ID column constraints
List<AccumuloColumnConstraint> constraints = getColumnConstraints(rowIdName, layoutHandle.getConstraint());
// Get the row domain column range
Optional<Domain> rDom = getRangeDomain(rowIdName, layoutHandle.getConstraint());
// Call out to our client to retrieve all tablet split metadata using the row ID domain and the secondary index
List<TabletSplitMetadata> tabletSplits = client.getTabletSplits(session, schemaName, tableName, rDom, constraints, tableHandle.getSerializerInstance());
// Pack the tablet split metadata into a connector split
ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
for (TabletSplitMetadata splitMetadata : tabletSplits) {
AccumuloSplit split = new AccumuloSplit(connectorId, schemaName, tableName, rowIdName, tableHandle.getSerializerClassName(), splitMetadata.getRanges().stream().map(WrappedRange::new).collect(Collectors.toList()), constraints, tableHandle.getScanAuthorizations(), splitMetadata.getHostPort());
cSplits.add(split);
}
return new FixedSplitSource(cSplits.build());
}
use of com.facebook.presto.accumulo.model.AccumuloColumnConstraint in project presto by prestodb.
the class ColumnCardinalityCache method getCardinalities.
/**
* Gets the cardinality for each {@link AccumuloColumnConstraint}.
* Given constraints are expected to be indexed! Who knows what would happen if they weren't!
*
* @param schema Schema name
* @param table Table name
* @param auths Scan authorizations
* @param idxConstraintRangePairs Mapping of all ranges for a given constraint
* @param earlyReturnThreshold Smallest acceptable cardinality to return early while other tasks complete
* @param pollingDuration Duration for polling the cardinality completion service
* @return An immutable multimap of cardinality to column constraint, sorted by cardinality from smallest to largest
* @throws TableNotFoundException If the metrics table does not exist
* @throws ExecutionException If another error occurs; I really don't even know anymore.
*/
public Multimap<Long, AccumuloColumnConstraint> getCardinalities(String schema, String table, Authorizations auths, Multimap<AccumuloColumnConstraint, Range> idxConstraintRangePairs, long earlyReturnThreshold, Duration pollingDuration) {
// Submit tasks to the executor to fetch column cardinality, adding it to the Guava cache if necessary
CompletionService<Pair<Long, AccumuloColumnConstraint>> executor = new ExecutorCompletionService<>(executorService);
idxConstraintRangePairs.asMap().forEach((key, value) -> executor.submit(() -> {
long cardinality = getColumnCardinality(schema, table, auths, key.getFamily(), key.getQualifier(), value);
LOG.debug("Cardinality for column %s is %s", key.getName(), cardinality);
return Pair.of(cardinality, key);
}));
// Create a multi map sorted by cardinality
ListMultimap<Long, AccumuloColumnConstraint> cardinalityToConstraints = MultimapBuilder.treeKeys().arrayListValues().build();
try {
boolean earlyReturn = false;
int numTasks = idxConstraintRangePairs.asMap().entrySet().size();
do {
// Sleep for the polling duration to allow concurrent tasks to run for this time
Thread.sleep(pollingDuration.toMillis());
// Poll each task, retrieving the result if it is done
for (int i = 0; i < numTasks; ++i) {
Future<Pair<Long, AccumuloColumnConstraint>> futureCardinality = executor.poll();
if (futureCardinality != null && futureCardinality.isDone()) {
Pair<Long, AccumuloColumnConstraint> columnCardinality = futureCardinality.get();
cardinalityToConstraints.put(columnCardinality.getLeft(), columnCardinality.getRight());
}
}
// If the smallest cardinality is present and below the threshold, set the earlyReturn flag
Optional<Entry<Long, AccumuloColumnConstraint>> smallestCardinality = cardinalityToConstraints.entries().stream().findFirst();
if (smallestCardinality.isPresent()) {
if (smallestCardinality.get().getKey() <= earlyReturnThreshold) {
LOG.info("Cardinality %s, is below threshold. Returning early while other tasks finish", smallestCardinality);
earlyReturn = true;
}
}
} while (!earlyReturn && cardinalityToConstraints.entries().size() < numTasks);
} catch (ExecutionException | InterruptedException e) {
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
throw new PrestoException(UNEXPECTED_ACCUMULO_ERROR, "Exception when getting cardinality", e);
}
// Create a copy of the cardinalities
return ImmutableMultimap.copyOf(cardinalityToConstraints);
}
use of com.facebook.presto.accumulo.model.AccumuloColumnConstraint in project presto by prestodb.
the class IndexLookup method getIndexRanges.
private List<Range> getIndexRanges(String indexTable, Multimap<AccumuloColumnConstraint, Range> constraintRanges, Collection<Range> rowIDRanges, Authorizations auths) {
Set<Range> finalRanges = new HashSet<>();
// For each column/constraint pair we submit a task to scan the index ranges
List<Future<Set<Range>>> tasks = new ArrayList<>();
CompletionService<Set<Range>> executor = new ExecutorCompletionService<>(executorService);
for (Entry<AccumuloColumnConstraint, Collection<Range>> constraintEntry : constraintRanges.asMap().entrySet()) {
tasks.add(executor.submit(() -> {
// Create a batch scanner against the index table, setting the ranges
BatchScanner scan = connector.createBatchScanner(indexTable, auths, 10);
scan.setRanges(constraintEntry.getValue());
// Fetch the column family for this specific column
scan.fetchColumnFamily(new Text(Indexer.getIndexColumnFamily(constraintEntry.getKey().getFamily().getBytes(), constraintEntry.getKey().getQualifier().getBytes()).array()));
// For each entry in the scanner
Text tmpQualifier = new Text();
Set<Range> columnRanges = new HashSet<>();
for (Entry<Key, Value> entry : scan) {
entry.getKey().getColumnQualifier(tmpQualifier);
// Add to our column ranges if it is in one of the row ID ranges
if (inRange(tmpQualifier, rowIDRanges)) {
columnRanges.add(new Range(tmpQualifier));
}
}
LOG.debug("Retrieved %d ranges for index column %s", columnRanges.size(), constraintEntry.getKey().getName());
scan.close();
return columnRanges;
}));
}
tasks.forEach(future -> {
try {
// If finalRanges is null, we have not yet added any column ranges
if (finalRanges.isEmpty()) {
finalRanges.addAll(future.get());
} else {
// Retain only the row IDs for this column that have already been added
// This is your set intersection operation!
finalRanges.retainAll(future.get());
}
} catch (ExecutionException | InterruptedException e) {
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
throw new PrestoException(UNEXPECTED_ACCUMULO_ERROR, "Exception when getting index ranges", e.getCause());
}
});
return ImmutableList.copyOf(finalRanges);
}
use of com.facebook.presto.accumulo.model.AccumuloColumnConstraint in project presto by prestodb.
the class AccumuloSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableLayoutHandle layout, SplitSchedulingContext splitSchedulingContext) {
AccumuloTableLayoutHandle layoutHandle = (AccumuloTableLayoutHandle) layout;
AccumuloTableHandle tableHandle = layoutHandle.getTable();
String schemaName = tableHandle.getSchema();
String tableName = tableHandle.getTable();
String rowIdName = tableHandle.getRowId();
// Get non-row ID column constraints
List<AccumuloColumnConstraint> constraints = getColumnConstraints(rowIdName, layoutHandle.getConstraint());
// Get the row domain column range
Optional<Domain> rDom = getRangeDomain(rowIdName, layoutHandle.getConstraint());
// Call out to our client to retrieve all tablet split metadata using the row ID domain and the secondary index
List<TabletSplitMetadata> tabletSplits = client.getTabletSplits(session, schemaName, tableName, rDom, constraints, tableHandle.getSerializerInstance());
// Pack the tablet split metadata into a connector split
ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
for (TabletSplitMetadata splitMetadata : tabletSplits) {
AccumuloSplit split = new AccumuloSplit(connectorId, schemaName, tableName, rowIdName, tableHandle.getSerializerClassName(), splitMetadata.getRanges().stream().map(WrappedRange::new).collect(Collectors.toList()), constraints, tableHandle.getScanAuthorizations(), splitMetadata.getHostPort());
cSplits.add(split);
}
return new FixedSplitSource(cSplits.build());
}
Aggregations