Search in sources :

Example 1 with TokenRange

use of com.datastax.driver.core.TokenRange in project cassandra by apache.

the class CqlInputFormat method getSplits.

public List<org.apache.hadoop.mapreduce.InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = HadoopCompat.getConfiguration(context);
    validateConfiguration(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    logger.trace("partitioner is {}", partitioner);
    // canonical ranges, split into pieces, fetching the splits in parallel
    ExecutorService executor = new ThreadPoolExecutor(0, 128, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>());
    List<org.apache.hadoop.mapreduce.InputSplit> splits = new ArrayList<>();
    try (Cluster cluster = CqlConfigHelper.getInputCluster(ConfigHelper.getInputInitialAddress(conf).split(","), conf);
        Session session = cluster.connect()) {
        List<Future<List<org.apache.hadoop.mapreduce.InputSplit>>> splitfutures = new ArrayList<>();
        Pair<String, String> jobKeyRange = ConfigHelper.getInputKeyRange(conf);
        Range<Token> jobRange = null;
        if (jobKeyRange != null) {
            jobRange = new Range<>(partitioner.getTokenFactory().fromString(jobKeyRange.left), partitioner.getTokenFactory().fromString(jobKeyRange.right));
        }
        Metadata metadata = cluster.getMetadata();
        // canonical ranges and nodes holding replicas
        Map<TokenRange, Set<Host>> masterRangeNodes = getRangeMap(keyspace, metadata);
        for (TokenRange range : masterRangeNodes.keySet()) {
            if (jobRange == null) {
                // for each tokenRange, pick a live owner and ask it to compute bite-sized splits
                splitfutures.add(executor.submit(new SplitCallable(range, masterRangeNodes.get(range), conf, session)));
            } else {
                TokenRange jobTokenRange = rangeToTokenRange(metadata, jobRange);
                if (range.intersects(jobTokenRange)) {
                    for (TokenRange intersection : range.intersectWith(jobTokenRange)) {
                        // for each tokenRange, pick a live owner and ask it to compute bite-sized splits
                        splitfutures.add(executor.submit(new SplitCallable(intersection, masterRangeNodes.get(range), conf, session)));
                    }
                }
            }
        }
        // wait until we have all the results back
        for (Future<List<org.apache.hadoop.mapreduce.InputSplit>> futureInputSplits : splitfutures) {
            try {
                splits.addAll(futureInputSplits.get());
            } catch (Exception e) {
                throw new IOException("Could not get input splits", e);
            }
        }
    } finally {
        executor.shutdownNow();
    }
    assert splits.size() > 0;
    Collections.shuffle(splits, new Random(System.nanoTime()));
    return splits;
}
Also used : ResultSet(com.datastax.driver.core.ResultSet) Configuration(org.apache.hadoop.conf.Configuration) Metadata(com.datastax.driver.core.Metadata) org.apache.cassandra.hadoop(org.apache.cassandra.hadoop) InputSplit(org.apache.hadoop.mapred.InputSplit) Cluster(com.datastax.driver.core.Cluster) IOException(java.io.IOException) IOException(java.io.IOException) TokenRange(com.datastax.driver.core.TokenRange) Session(com.datastax.driver.core.Session)

Example 2 with TokenRange

use of com.datastax.driver.core.TokenRange in project cassandra by apache.

the class CqlInputFormat method describeSplits.

private Map<TokenRange, Long> describeSplits(String keyspace, String table, TokenRange tokenRange, int splitSize, int splitSizeMb, Session session) {
    String query = String.format("SELECT mean_partition_size, partitions_count " + "FROM %s.%s " + "WHERE keyspace_name = ? AND table_name = ? AND range_start = ? AND range_end = ?", SchemaConstants.SYSTEM_KEYSPACE_NAME, SystemKeyspace.SIZE_ESTIMATES);
    ResultSet resultSet = session.execute(query, keyspace, table, tokenRange.getStart().toString(), tokenRange.getEnd().toString());
    Row row = resultSet.one();
    long meanPartitionSize = 0;
    long partitionCount = 0;
    int splitCount = 0;
    if (row != null) {
        meanPartitionSize = row.getLong("mean_partition_size");
        partitionCount = row.getLong("partitions_count");
        splitCount = splitSizeMb > 0 ? (int) (meanPartitionSize * partitionCount / splitSizeMb / 1024 / 1024) : (int) (partitionCount / splitSize);
    }
    // Assume smallest granularity of partition count available from CASSANDRA-7688
    if (splitCount == 0) {
        Map<TokenRange, Long> wrappedTokenRange = new HashMap<>();
        wrappedTokenRange.put(tokenRange, (long) 128);
        return wrappedTokenRange;
    }
    List<TokenRange> splitRanges = tokenRange.splitEvenly(splitCount);
    Map<TokenRange, Long> rangesWithLength = new HashMap<>();
    for (TokenRange range : splitRanges) rangesWithLength.put(range, partitionCount / splitCount);
    return rangesWithLength;
}
Also used : ResultSet(com.datastax.driver.core.ResultSet) TokenRange(com.datastax.driver.core.TokenRange) Row(com.datastax.driver.core.Row)

Example 3 with TokenRange

use of com.datastax.driver.core.TokenRange in project presto by prestodb.

the class CassandraTokenSplitManager method getSplits.

public List<TokenSplit> getSplits(String keyspace, String table) {
    Set<TokenRange> tokenRanges = getTokenRanges();
    if (tokenRanges.isEmpty()) {
        throw new PrestoException(CASSANDRA_METADATA_ERROR, "The cluster metadata is not available. " + "Please make sure that the Cassandra cluster is up and running, " + "and that the contact points are specified correctly.");
    }
    if (tokenRanges.stream().anyMatch(TokenRange::isWrappedAround)) {
        tokenRanges = unwrap(tokenRanges);
    }
    Optional<TokenRing> tokenRing = createForPartitioner(getPartitioner());
    long totalPartitionsCount = getTotalPartitionsCount(keyspace, table);
    List<TokenSplit> splits = new ArrayList<>();
    for (TokenRange tokenRange : tokenRanges) {
        if (tokenRange.isEmpty()) {
            continue;
        }
        checkState(!tokenRange.isWrappedAround(), "all token ranges must be unwrapped at this step");
        List<String> endpoints = getEndpoints(keyspace, tokenRange);
        checkState(!endpoints.isEmpty(), "endpoints is empty for token range: %s", tokenRange);
        if (!tokenRing.isPresent()) {
            checkState(!tokenRange.isWrappedAround(), "all token ranges must be unwrapped at this step");
            splits.add(createSplit(tokenRange, endpoints));
            continue;
        }
        double tokenRangeRingFraction = tokenRing.get().getRingFraction(tokenRange.getStart().toString(), tokenRange.getEnd().toString());
        long partitionsCountEstimate = round(totalPartitionsCount * tokenRangeRingFraction);
        checkState(partitionsCountEstimate >= 0, "unexpected partitions count estimate: %d", partitionsCountEstimate);
        int subSplitCount = max(toIntExact(partitionsCountEstimate / splitSize), 1);
        List<TokenRange> subRanges = tokenRange.splitEvenly(subSplitCount);
        for (TokenRange subRange : subRanges) {
            if (subRange.isEmpty()) {
                continue;
            }
            checkState(!subRange.isWrappedAround(), "all token ranges must be unwrapped at this step");
            splits.add(createSplit(subRange, endpoints));
        }
    }
    shuffle(splits, ThreadLocalRandom.current());
    return unmodifiableList(splits);
}
Also used : ArrayList(java.util.ArrayList) PrestoException(com.facebook.presto.spi.PrestoException) TokenRange(com.datastax.driver.core.TokenRange)

Aggregations

TokenRange (com.datastax.driver.core.TokenRange)3 ResultSet (com.datastax.driver.core.ResultSet)2 Cluster (com.datastax.driver.core.Cluster)1 Metadata (com.datastax.driver.core.Metadata)1 Row (com.datastax.driver.core.Row)1 Session (com.datastax.driver.core.Session)1 PrestoException (com.facebook.presto.spi.PrestoException)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 org.apache.cassandra.hadoop (org.apache.cassandra.hadoop)1 Configuration (org.apache.hadoop.conf.Configuration)1 InputSplit (org.apache.hadoop.mapred.InputSplit)1