Search in sources :

Example 1 with AccumuloColumnConstraint

use of com.facebook.presto.accumulo.model.AccumuloColumnConstraint in project presto by prestodb.

the class AccumuloSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableLayoutHandle layout) {
    AccumuloTableLayoutHandle layoutHandle = (AccumuloTableLayoutHandle) layout;
    AccumuloTableHandle tableHandle = layoutHandle.getTable();
    String schemaName = tableHandle.getSchema();
    String tableName = tableHandle.getTable();
    String rowIdName = tableHandle.getRowId();
    // Get non-row ID column constraints
    List<AccumuloColumnConstraint> constraints = getColumnConstraints(rowIdName, layoutHandle.getConstraint());
    // Get the row domain column range
    Optional<Domain> rDom = getRangeDomain(rowIdName, layoutHandle.getConstraint());
    // Call out to our client to retrieve all tablet split metadata using the row ID domain and the secondary index
    List<TabletSplitMetadata> tabletSplits = client.getTabletSplits(session, schemaName, tableName, rDom, constraints, tableHandle.getSerializerInstance());
    // Pack the tablet split metadata into a connector split
    ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
    for (TabletSplitMetadata splitMetadata : tabletSplits) {
        AccumuloSplit split = new AccumuloSplit(connectorId, schemaName, tableName, rowIdName, tableHandle.getSerializerClassName(), splitMetadata.getRanges().stream().map(WrappedRange::new).collect(Collectors.toList()), constraints, tableHandle.getScanAuthorizations(), splitMetadata.getHostPort());
        cSplits.add(split);
    }
    return new FixedSplitSource(cSplits.build());
}
Also used : AccumuloColumnConstraint(com.facebook.presto.accumulo.model.AccumuloColumnConstraint) ImmutableList(com.google.common.collect.ImmutableList) TabletSplitMetadata(com.facebook.presto.accumulo.model.TabletSplitMetadata) AccumuloSplit(com.facebook.presto.accumulo.model.AccumuloSplit) WrappedRange(com.facebook.presto.accumulo.model.WrappedRange) AccumuloTableLayoutHandle(com.facebook.presto.accumulo.model.AccumuloTableLayoutHandle) FixedSplitSource(com.facebook.presto.spi.FixedSplitSource) AccumuloTableHandle(com.facebook.presto.accumulo.model.AccumuloTableHandle) ColumnDomain(com.facebook.presto.spi.predicate.TupleDomain.ColumnDomain) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) Domain(com.facebook.presto.spi.predicate.Domain) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit)

Example 2 with AccumuloColumnConstraint

use of com.facebook.presto.accumulo.model.AccumuloColumnConstraint in project presto by prestodb.

the class ColumnCardinalityCache method getCardinalities.

/**
 * Gets the cardinality for each {@link AccumuloColumnConstraint}.
 * Given constraints are expected to be indexed! Who knows what would happen if they weren't!
 *
 * @param schema Schema name
 * @param table Table name
 * @param auths Scan authorizations
 * @param idxConstraintRangePairs Mapping of all ranges for a given constraint
 * @param earlyReturnThreshold Smallest acceptable cardinality to return early while other tasks complete
 * @param pollingDuration Duration for polling the cardinality completion service
 * @return An immutable multimap of cardinality to column constraint, sorted by cardinality from smallest to largest
 * @throws TableNotFoundException If the metrics table does not exist
 * @throws ExecutionException If another error occurs; I really don't even know anymore.
 */
public Multimap<Long, AccumuloColumnConstraint> getCardinalities(String schema, String table, Authorizations auths, Multimap<AccumuloColumnConstraint, Range> idxConstraintRangePairs, long earlyReturnThreshold, Duration pollingDuration) {
    // Submit tasks to the executor to fetch column cardinality, adding it to the Guava cache if necessary
    CompletionService<Pair<Long, AccumuloColumnConstraint>> executor = new ExecutorCompletionService<>(executorService);
    idxConstraintRangePairs.asMap().forEach((key, value) -> executor.submit(() -> {
        long cardinality = getColumnCardinality(schema, table, auths, key.getFamily(), key.getQualifier(), value);
        LOG.debug("Cardinality for column %s is %s", key.getName(), cardinality);
        return Pair.of(cardinality, key);
    }));
    // Create a multi map sorted by cardinality
    ListMultimap<Long, AccumuloColumnConstraint> cardinalityToConstraints = MultimapBuilder.treeKeys().arrayListValues().build();
    try {
        boolean earlyReturn = false;
        int numTasks = idxConstraintRangePairs.asMap().entrySet().size();
        do {
            // Sleep for the polling duration to allow concurrent tasks to run for this time
            Thread.sleep(pollingDuration.toMillis());
            // Poll each task, retrieving the result if it is done
            for (int i = 0; i < numTasks; ++i) {
                Future<Pair<Long, AccumuloColumnConstraint>> futureCardinality = executor.poll();
                if (futureCardinality != null && futureCardinality.isDone()) {
                    Pair<Long, AccumuloColumnConstraint> columnCardinality = futureCardinality.get();
                    cardinalityToConstraints.put(columnCardinality.getLeft(), columnCardinality.getRight());
                }
            }
            // If the smallest cardinality is present and below the threshold, set the earlyReturn flag
            Optional<Entry<Long, AccumuloColumnConstraint>> smallestCardinality = cardinalityToConstraints.entries().stream().findFirst();
            if (smallestCardinality.isPresent()) {
                if (smallestCardinality.get().getKey() <= earlyReturnThreshold) {
                    LOG.info("Cardinality %s, is below threshold. Returning early while other tasks finish", smallestCardinality);
                    earlyReturn = true;
                }
            }
        } while (!earlyReturn && cardinalityToConstraints.entries().size() < numTasks);
    } catch (ExecutionException | InterruptedException e) {
        if (e instanceof InterruptedException) {
            Thread.currentThread().interrupt();
        }
        throw new PrestoException(UNEXPECTED_ACCUMULO_ERROR, "Exception when getting cardinality", e);
    }
    // Create a copy of the cardinalities
    return ImmutableMultimap.copyOf(cardinalityToConstraints);
}
Also used : AccumuloColumnConstraint(com.facebook.presto.accumulo.model.AccumuloColumnConstraint) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) PrestoException(com.facebook.presto.spi.PrestoException) AccumuloColumnConstraint(com.facebook.presto.accumulo.model.AccumuloColumnConstraint) Entry(java.util.Map.Entry) Long.parseLong(java.lang.Long.parseLong) ExecutionException(java.util.concurrent.ExecutionException) Pair(org.apache.commons.lang3.tuple.Pair)

Example 3 with AccumuloColumnConstraint

use of com.facebook.presto.accumulo.model.AccumuloColumnConstraint in project presto by prestodb.

the class IndexLookup method getIndexRanges.

private List<Range> getIndexRanges(String indexTable, Multimap<AccumuloColumnConstraint, Range> constraintRanges, Collection<Range> rowIDRanges, Authorizations auths) {
    Set<Range> finalRanges = new HashSet<>();
    // For each column/constraint pair we submit a task to scan the index ranges
    List<Future<Set<Range>>> tasks = new ArrayList<>();
    CompletionService<Set<Range>> executor = new ExecutorCompletionService<>(executorService);
    for (Entry<AccumuloColumnConstraint, Collection<Range>> constraintEntry : constraintRanges.asMap().entrySet()) {
        tasks.add(executor.submit(() -> {
            // Create a batch scanner against the index table, setting the ranges
            BatchScanner scan = connector.createBatchScanner(indexTable, auths, 10);
            scan.setRanges(constraintEntry.getValue());
            // Fetch the column family for this specific column
            scan.fetchColumnFamily(new Text(Indexer.getIndexColumnFamily(constraintEntry.getKey().getFamily().getBytes(), constraintEntry.getKey().getQualifier().getBytes()).array()));
            // For each entry in the scanner
            Text tmpQualifier = new Text();
            Set<Range> columnRanges = new HashSet<>();
            for (Entry<Key, Value> entry : scan) {
                entry.getKey().getColumnQualifier(tmpQualifier);
                // Add to our column ranges if it is in one of the row ID ranges
                if (inRange(tmpQualifier, rowIDRanges)) {
                    columnRanges.add(new Range(tmpQualifier));
                }
            }
            LOG.debug("Retrieved %d ranges for index column %s", columnRanges.size(), constraintEntry.getKey().getName());
            scan.close();
            return columnRanges;
        }));
    }
    tasks.forEach(future -> {
        try {
            // If finalRanges is null, we have not yet added any column ranges
            if (finalRanges.isEmpty()) {
                finalRanges.addAll(future.get());
            } else {
                // Retain only the row IDs for this column that have already been added
                // This is your set intersection operation!
                finalRanges.retainAll(future.get());
            }
        } catch (ExecutionException | InterruptedException e) {
            if (e instanceof InterruptedException) {
                Thread.currentThread().interrupt();
            }
            throw new PrestoException(UNEXPECTED_ACCUMULO_ERROR, "Exception when getting index ranges", e.getCause());
        }
    });
    return ImmutableList.copyOf(finalRanges);
}
Also used : AccumuloColumnConstraint(com.facebook.presto.accumulo.model.AccumuloColumnConstraint) Set(java.util.Set) HashSet(java.util.HashSet) ArrayList(java.util.ArrayList) BatchScanner(org.apache.accumulo.core.client.BatchScanner) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) Text(org.apache.hadoop.io.Text) PrestoException(com.facebook.presto.spi.PrestoException) Range(org.apache.accumulo.core.data.Range) Entry(java.util.Map.Entry) Future(java.util.concurrent.Future) Collection(java.util.Collection) ExecutionException(java.util.concurrent.ExecutionException) HashSet(java.util.HashSet)

Example 4 with AccumuloColumnConstraint

use of com.facebook.presto.accumulo.model.AccumuloColumnConstraint in project presto by prestodb.

the class AccumuloSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableLayoutHandle layout, SplitSchedulingContext splitSchedulingContext) {
    AccumuloTableLayoutHandle layoutHandle = (AccumuloTableLayoutHandle) layout;
    AccumuloTableHandle tableHandle = layoutHandle.getTable();
    String schemaName = tableHandle.getSchema();
    String tableName = tableHandle.getTable();
    String rowIdName = tableHandle.getRowId();
    // Get non-row ID column constraints
    List<AccumuloColumnConstraint> constraints = getColumnConstraints(rowIdName, layoutHandle.getConstraint());
    // Get the row domain column range
    Optional<Domain> rDom = getRangeDomain(rowIdName, layoutHandle.getConstraint());
    // Call out to our client to retrieve all tablet split metadata using the row ID domain and the secondary index
    List<TabletSplitMetadata> tabletSplits = client.getTabletSplits(session, schemaName, tableName, rDom, constraints, tableHandle.getSerializerInstance());
    // Pack the tablet split metadata into a connector split
    ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
    for (TabletSplitMetadata splitMetadata : tabletSplits) {
        AccumuloSplit split = new AccumuloSplit(connectorId, schemaName, tableName, rowIdName, tableHandle.getSerializerClassName(), splitMetadata.getRanges().stream().map(WrappedRange::new).collect(Collectors.toList()), constraints, tableHandle.getScanAuthorizations(), splitMetadata.getHostPort());
        cSplits.add(split);
    }
    return new FixedSplitSource(cSplits.build());
}
Also used : AccumuloColumnConstraint(com.facebook.presto.accumulo.model.AccumuloColumnConstraint) ImmutableList(com.google.common.collect.ImmutableList) TabletSplitMetadata(com.facebook.presto.accumulo.model.TabletSplitMetadata) AccumuloSplit(com.facebook.presto.accumulo.model.AccumuloSplit) WrappedRange(com.facebook.presto.accumulo.model.WrappedRange) AccumuloTableLayoutHandle(com.facebook.presto.accumulo.model.AccumuloTableLayoutHandle) FixedSplitSource(com.facebook.presto.spi.FixedSplitSource) AccumuloTableHandle(com.facebook.presto.accumulo.model.AccumuloTableHandle) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) ColumnDomain(com.facebook.presto.common.predicate.TupleDomain.ColumnDomain) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit)

Aggregations

AccumuloColumnConstraint (com.facebook.presto.accumulo.model.AccumuloColumnConstraint)4 AccumuloSplit (com.facebook.presto.accumulo.model.AccumuloSplit)2 AccumuloTableHandle (com.facebook.presto.accumulo.model.AccumuloTableHandle)2 AccumuloTableLayoutHandle (com.facebook.presto.accumulo.model.AccumuloTableLayoutHandle)2 TabletSplitMetadata (com.facebook.presto.accumulo.model.TabletSplitMetadata)2 WrappedRange (com.facebook.presto.accumulo.model.WrappedRange)2 ConnectorSplit (com.facebook.presto.spi.ConnectorSplit)2 FixedSplitSource (com.facebook.presto.spi.FixedSplitSource)2 PrestoException (com.facebook.presto.spi.PrestoException)2 ImmutableList (com.google.common.collect.ImmutableList)2 Entry (java.util.Map.Entry)2 ExecutionException (java.util.concurrent.ExecutionException)2 ExecutorCompletionService (java.util.concurrent.ExecutorCompletionService)2 Domain (com.facebook.presto.common.predicate.Domain)1 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)1 ColumnDomain (com.facebook.presto.common.predicate.TupleDomain.ColumnDomain)1 Domain (com.facebook.presto.spi.predicate.Domain)1 TupleDomain (com.facebook.presto.spi.predicate.TupleDomain)1 ColumnDomain (com.facebook.presto.spi.predicate.TupleDomain.ColumnDomain)1 Long.parseLong (java.lang.Long.parseLong)1