Search in sources :

Example 21 with ConsumerConfig

use of co.cask.cdap.data2.queue.ConsumerConfig in project cdap by caskdata.

the class LineageWriterDataFabricFacade method createConsumer.

@Override
public QueueConsumer createConsumer(QueueName queueName, ConsumerConfig consumerConfig, int numGroups) throws IOException {
    QueueConsumer consumer = queueClientFactory.createConsumer(queueName, consumerConfig, numGroups);
    if (consumer instanceof TransactionAware) {
        consumer = new CloseableQueueConsumer(datasetCache, consumer);
        datasetCache.addExtraTransactionAware((TransactionAware) consumer);
    }
    return consumer;
}
Also used : QueueConsumer(co.cask.cdap.data2.queue.QueueConsumer) TransactionAware(org.apache.tephra.TransactionAware)

Example 22 with ConsumerConfig

use of co.cask.cdap.data2.queue.ConsumerConfig in project cdap by caskdata.

the class FlowletProgramRunner method processSpecificationFactory.

private ProcessSpecificationFactory processSpecificationFactory(final BasicFlowletContext flowletContext, final DataFabricFacade dataFabricFacade, final QueueReaderFactory queueReaderFactory, final String flowletName, final Table<Node, String, Set<QueueSpecification>> queueSpecs, final ImmutableList.Builder<ConsumerSupplier<?>> queueConsumerSupplierBuilder, final SchemaCache schemaCache) {
    return new ProcessSpecificationFactory() {

        @Override
        public <T> ProcessSpecification create(Set<String> inputNames, Schema schema, TypeToken<T> dataType, ProcessMethod<T> method, ConsumerConfig consumerConfig, int batchSize, Tick tickAnnotation) throws Exception {
            List<QueueReader<T>> queueReaders = Lists.newLinkedList();
            for (Map.Entry<Node, Set<QueueSpecification>> entry : queueSpecs.column(flowletName).entrySet()) {
                for (QueueSpecification queueSpec : entry.getValue()) {
                    final QueueName queueName = queueSpec.getQueueName();
                    if (queueSpec.getInputSchema().equals(schema) && (inputNames.contains(queueName.getSimpleName()) || inputNames.contains(FlowletDefinition.ANY_INPUT))) {
                        Node sourceNode = entry.getKey();
                        if (sourceNode.getType() == FlowletConnection.Type.STREAM) {
                            ConsumerSupplier<StreamConsumer> consumerSupplier = ConsumerSupplier.create(flowletContext.getOwners(), runtimeUsageRegistry, dataFabricFacade, queueName, consumerConfig);
                            queueConsumerSupplierBuilder.add(consumerSupplier);
                            // No decoding is needed, as a process method can only have StreamEvent as type for consuming stream
                            Function<StreamEvent, T> decoder = wrapInputDecoder(flowletContext, null, queueName, new Function<StreamEvent, T>() {

                                @Override
                                @SuppressWarnings("unchecked")
                                public T apply(StreamEvent input) {
                                    return (T) input;
                                }
                            });
                            queueReaders.add(queueReaderFactory.createStreamReader(queueName.toStreamId(), consumerSupplier, batchSize, decoder));
                        } else {
                            int numGroups = getNumGroups(Iterables.concat(queueSpecs.row(entry.getKey()).values()), queueName);
                            Function<ByteBuffer, T> decoder = wrapInputDecoder(// the producer flowlet,
                            flowletContext, // the producer flowlet,
                            entry.getKey().getName(), queueName, createInputDatumDecoder(dataType, schema, schemaCache));
                            ConsumerSupplier<QueueConsumer> consumerSupplier = ConsumerSupplier.create(flowletContext.getOwners(), runtimeUsageRegistry, dataFabricFacade, queueName, consumerConfig, numGroups);
                            queueConsumerSupplierBuilder.add(consumerSupplier);
                            queueReaders.add(queueReaderFactory.createQueueReader(consumerSupplier, batchSize, decoder));
                        }
                    }
                }
            }
            // If inputs is needed but there is no available input queue, return null
            if (!inputNames.isEmpty() && queueReaders.isEmpty()) {
                return null;
            }
            return new ProcessSpecification<>(new RoundRobinQueueReader<>(queueReaders), method, tickAnnotation);
        }
    };
}
Also used : QueueReader(co.cask.cdap.app.queue.QueueReader) RoundRobinQueueReader(co.cask.cdap.internal.app.queue.RoundRobinQueueReader) Set(java.util.Set) ImmutableSet(com.google.common.collect.ImmutableSet) Schema(co.cask.cdap.api.data.schema.Schema) Node(co.cask.cdap.app.queue.QueueSpecificationGenerator.Node) ConsumerConfig(co.cask.cdap.data2.queue.ConsumerConfig) Tick(co.cask.cdap.api.annotation.Tick) QueueName(co.cask.cdap.common.queue.QueueName) StreamConsumer(co.cask.cdap.data2.transaction.stream.StreamConsumer) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) ByteBuffer(java.nio.ByteBuffer) QueueConsumer(co.cask.cdap.data2.queue.QueueConsumer) TypeToken(com.google.common.reflect.TypeToken) QueueSpecification(co.cask.cdap.app.queue.QueueSpecification) Map(java.util.Map)

Example 23 with ConsumerConfig

use of co.cask.cdap.data2.queue.ConsumerConfig in project cdap by caskdata.

the class SaltedHBaseQueueStrategy method createScanner.

@Override
public QueueScanner createScanner(ConsumerConfig consumerConfig, HTable hTable, Scan scan, int numRows) throws IOException {
    // we should roughly divide by number of buckets, but don't want another RPC for the case we are not exactly right
    ScanBuilder distributedScan = tableUtil.buildScan(scan);
    int caching = (int) (1.1 * numRows / distributorBuckets);
    distributedScan.setCaching(caching);
    ResultScanner scanner = DistributedScanner.create(hTable, distributedScan.build(), rowKeyDistributor, scansExecutor);
    return new HBaseQueueScanner(scanner, numRows, rowKeyConverter);
}
Also used : ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) ScanBuilder(co.cask.cdap.data2.util.hbase.ScanBuilder)

Example 24 with ConsumerConfig

use of co.cask.cdap.data2.queue.ConsumerConfig in project cdap by caskdata.

the class InMemoryQueue method dequeue.

public ImmutablePair<List<Key>, List<byte[]>> dequeue(Transaction tx, ConsumerConfig config, ConsumerState consumerState, int maxBatchSize) {
    List<Key> keys = Lists.newArrayListWithCapacity(maxBatchSize);
    List<byte[]> datas = Lists.newArrayListWithCapacity(maxBatchSize);
    NavigableSet<Key> keysToScan = consumerState.startKey == null ? entries.navigableKeySet() : entries.tailMap(consumerState.startKey).navigableKeySet();
    boolean updateStartKey = true;
    // navigableKeySet is immune to concurrent modification
    for (Key key : keysToScan) {
        if (keys.size() >= maxBatchSize) {
            break;
        }
        if (updateStartKey && key.txId < tx.getFirstShortInProgress()) {
            // See QueueEntryRow#canCommit for reason.
            consumerState.startKey = key;
        }
        if (tx.getReadPointer() < key.txId) {
            // the entry is newer than the current transaction. so are all subsequent entries. bail out.
            break;
        } else if (tx.isInProgress(key.txId)) {
            // the entry is in the exclude list of current transaction. There is a chance that visible entries follow.
            // next time we have to revisit this entry
            updateStartKey = false;
            continue;
        }
        Item item = entries.get(key);
        if (item == null) {
            // entry was deleted (evicted or undone) after we started iterating
            continue;
        }
        // check whether this is processed already
        ConsumerEntryState state = item.getConsumerState(config.getGroupId());
        if (ConsumerEntryState.PROCESSED.equals(state)) {
            // already processed but not yet evicted. move on
            continue;
        }
        if (config.getDequeueStrategy().equals(DequeueStrategy.FIFO)) {
            // for FIFO, attempt to claim the entry and return it
            if (item.claim(config)) {
                keys.add(key);
                datas.add(item.entry.getData());
            }
            // else: someone else claimed it, or it was already processed, move on, but we may have to revisit this.
            updateStartKey = false;
            continue;
        }
        // for hash/round robin, if group size is 1, just take it
        if (config.getGroupSize() == 1) {
            keys.add(key);
            datas.add(item.entry.getData());
            updateStartKey = false;
            continue;
        }
        // hash by entry hash key or entry id
        int hash;
        if (config.getDequeueStrategy().equals(DequeueStrategy.ROUND_ROBIN)) {
            hash = key.hashCode();
        } else {
            Integer hashFoundInEntry = item.entry.getHashKey(config.getHashKey());
            hash = hashFoundInEntry == null ? 0 : hashFoundInEntry;
        }
        // modulo of a negative is negative, make sure we're positive or 0.
        if (Math.abs(hash) % config.getGroupSize() == config.getInstanceId()) {
            keys.add(key);
            datas.add(item.entry.getData());
            updateStartKey = false;
        }
    }
    return keys.isEmpty() ? null : ImmutablePair.of(keys, datas);
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ConsumerEntryState(co.cask.cdap.data2.transaction.queue.ConsumerEntryState)

Example 25 with ConsumerConfig

use of co.cask.cdap.data2.queue.ConsumerConfig in project cdap by caskdata.

the class AbstractStreamFileConsumer method createBaseReadFilter.

private ReadFilter createBaseReadFilter(final ConsumerConfig consumerConfig) {
    final int groupSize = consumerConfig.getGroupSize();
    final DequeueStrategy strategy = consumerConfig.getDequeueStrategy();
    if (groupSize == 1 || strategy == DequeueStrategy.FIFO) {
        return ReadFilter.ALWAYS_ACCEPT;
    }
    // For RoundRobin and Hash partition, the claim is done by matching hashCode to instance id.
    // For Hash, to preserve existing behavior, everything route to instance 0.
    // For RoundRobin, the idea is to scatter the events across consumers evenly. Since there is no way to known
    // about the absolute starting point to do true round robin, we employ a good enough hash function on the
    // file offset as a way to spread events across consumers
    final int instanceId = consumerConfig.getInstanceId();
    return new ReadFilter() {

        @Override
        public boolean acceptOffset(long offset) {
            int hashValue = Math.abs(strategy == DequeueStrategy.HASH ? 0 : ROUND_ROBIN_HASHER.hashLong(offset).hashCode());
            return instanceId == (hashValue % groupSize);
        }
    };
}
Also used : DequeueStrategy(co.cask.cdap.data2.queue.DequeueStrategy) ReadFilter(co.cask.cdap.data.file.ReadFilter)

Aggregations

ConsumerConfig (co.cask.cdap.data2.queue.ConsumerConfig)33 QueueConsumer (co.cask.cdap.data2.queue.QueueConsumer)17 TransactionContext (org.apache.tephra.TransactionContext)14 Test (org.junit.Test)14 QueueName (co.cask.cdap.common.queue.QueueName)12 ConsumerGroupConfig (co.cask.cdap.data2.queue.ConsumerGroupConfig)12 QueueEntry (co.cask.cdap.data2.queue.QueueEntry)8 Transaction (org.apache.tephra.Transaction)8 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)7 QueueProducer (co.cask.cdap.data2.queue.QueueProducer)7 QueueEntryRow (co.cask.cdap.data2.transaction.queue.QueueEntryRow)7 StreamId (co.cask.cdap.proto.id.StreamId)7 Cell (org.apache.hadoop.hbase.Cell)7 Filter (org.apache.hadoop.hbase.filter.Filter)7 FilterList (org.apache.hadoop.hbase.filter.FilterList)7 IOException (java.io.IOException)6 TransactionFailureException (org.apache.tephra.TransactionFailureException)6 TransactionExecutor (org.apache.tephra.TransactionExecutor)5 DequeueResult (co.cask.cdap.data2.queue.DequeueResult)4 DequeueStrategy (co.cask.cdap.data2.queue.DequeueStrategy)3