use of co.cask.cdap.data2.queue.ConsumerConfig in project cdap by caskdata.
the class LineageWriterDataFabricFacade method createConsumer.
@Override
public QueueConsumer createConsumer(QueueName queueName, ConsumerConfig consumerConfig, int numGroups) throws IOException {
QueueConsumer consumer = queueClientFactory.createConsumer(queueName, consumerConfig, numGroups);
if (consumer instanceof TransactionAware) {
consumer = new CloseableQueueConsumer(datasetCache, consumer);
datasetCache.addExtraTransactionAware((TransactionAware) consumer);
}
return consumer;
}
use of co.cask.cdap.data2.queue.ConsumerConfig in project cdap by caskdata.
the class FlowletProgramRunner method processSpecificationFactory.
private ProcessSpecificationFactory processSpecificationFactory(final BasicFlowletContext flowletContext, final DataFabricFacade dataFabricFacade, final QueueReaderFactory queueReaderFactory, final String flowletName, final Table<Node, String, Set<QueueSpecification>> queueSpecs, final ImmutableList.Builder<ConsumerSupplier<?>> queueConsumerSupplierBuilder, final SchemaCache schemaCache) {
return new ProcessSpecificationFactory() {
@Override
public <T> ProcessSpecification create(Set<String> inputNames, Schema schema, TypeToken<T> dataType, ProcessMethod<T> method, ConsumerConfig consumerConfig, int batchSize, Tick tickAnnotation) throws Exception {
List<QueueReader<T>> queueReaders = Lists.newLinkedList();
for (Map.Entry<Node, Set<QueueSpecification>> entry : queueSpecs.column(flowletName).entrySet()) {
for (QueueSpecification queueSpec : entry.getValue()) {
final QueueName queueName = queueSpec.getQueueName();
if (queueSpec.getInputSchema().equals(schema) && (inputNames.contains(queueName.getSimpleName()) || inputNames.contains(FlowletDefinition.ANY_INPUT))) {
Node sourceNode = entry.getKey();
if (sourceNode.getType() == FlowletConnection.Type.STREAM) {
ConsumerSupplier<StreamConsumer> consumerSupplier = ConsumerSupplier.create(flowletContext.getOwners(), runtimeUsageRegistry, dataFabricFacade, queueName, consumerConfig);
queueConsumerSupplierBuilder.add(consumerSupplier);
// No decoding is needed, as a process method can only have StreamEvent as type for consuming stream
Function<StreamEvent, T> decoder = wrapInputDecoder(flowletContext, null, queueName, new Function<StreamEvent, T>() {
@Override
@SuppressWarnings("unchecked")
public T apply(StreamEvent input) {
return (T) input;
}
});
queueReaders.add(queueReaderFactory.createStreamReader(queueName.toStreamId(), consumerSupplier, batchSize, decoder));
} else {
int numGroups = getNumGroups(Iterables.concat(queueSpecs.row(entry.getKey()).values()), queueName);
Function<ByteBuffer, T> decoder = wrapInputDecoder(// the producer flowlet,
flowletContext, // the producer flowlet,
entry.getKey().getName(), queueName, createInputDatumDecoder(dataType, schema, schemaCache));
ConsumerSupplier<QueueConsumer> consumerSupplier = ConsumerSupplier.create(flowletContext.getOwners(), runtimeUsageRegistry, dataFabricFacade, queueName, consumerConfig, numGroups);
queueConsumerSupplierBuilder.add(consumerSupplier);
queueReaders.add(queueReaderFactory.createQueueReader(consumerSupplier, batchSize, decoder));
}
}
}
}
// If inputs is needed but there is no available input queue, return null
if (!inputNames.isEmpty() && queueReaders.isEmpty()) {
return null;
}
return new ProcessSpecification<>(new RoundRobinQueueReader<>(queueReaders), method, tickAnnotation);
}
};
}
use of co.cask.cdap.data2.queue.ConsumerConfig in project cdap by caskdata.
the class SaltedHBaseQueueStrategy method createScanner.
@Override
public QueueScanner createScanner(ConsumerConfig consumerConfig, HTable hTable, Scan scan, int numRows) throws IOException {
// we should roughly divide by number of buckets, but don't want another RPC for the case we are not exactly right
ScanBuilder distributedScan = tableUtil.buildScan(scan);
int caching = (int) (1.1 * numRows / distributorBuckets);
distributedScan.setCaching(caching);
ResultScanner scanner = DistributedScanner.create(hTable, distributedScan.build(), rowKeyDistributor, scansExecutor);
return new HBaseQueueScanner(scanner, numRows, rowKeyConverter);
}
use of co.cask.cdap.data2.queue.ConsumerConfig in project cdap by caskdata.
the class InMemoryQueue method dequeue.
public ImmutablePair<List<Key>, List<byte[]>> dequeue(Transaction tx, ConsumerConfig config, ConsumerState consumerState, int maxBatchSize) {
List<Key> keys = Lists.newArrayListWithCapacity(maxBatchSize);
List<byte[]> datas = Lists.newArrayListWithCapacity(maxBatchSize);
NavigableSet<Key> keysToScan = consumerState.startKey == null ? entries.navigableKeySet() : entries.tailMap(consumerState.startKey).navigableKeySet();
boolean updateStartKey = true;
// navigableKeySet is immune to concurrent modification
for (Key key : keysToScan) {
if (keys.size() >= maxBatchSize) {
break;
}
if (updateStartKey && key.txId < tx.getFirstShortInProgress()) {
// See QueueEntryRow#canCommit for reason.
consumerState.startKey = key;
}
if (tx.getReadPointer() < key.txId) {
// the entry is newer than the current transaction. so are all subsequent entries. bail out.
break;
} else if (tx.isInProgress(key.txId)) {
// the entry is in the exclude list of current transaction. There is a chance that visible entries follow.
// next time we have to revisit this entry
updateStartKey = false;
continue;
}
Item item = entries.get(key);
if (item == null) {
// entry was deleted (evicted or undone) after we started iterating
continue;
}
// check whether this is processed already
ConsumerEntryState state = item.getConsumerState(config.getGroupId());
if (ConsumerEntryState.PROCESSED.equals(state)) {
// already processed but not yet evicted. move on
continue;
}
if (config.getDequeueStrategy().equals(DequeueStrategy.FIFO)) {
// for FIFO, attempt to claim the entry and return it
if (item.claim(config)) {
keys.add(key);
datas.add(item.entry.getData());
}
// else: someone else claimed it, or it was already processed, move on, but we may have to revisit this.
updateStartKey = false;
continue;
}
// for hash/round robin, if group size is 1, just take it
if (config.getGroupSize() == 1) {
keys.add(key);
datas.add(item.entry.getData());
updateStartKey = false;
continue;
}
// hash by entry hash key or entry id
int hash;
if (config.getDequeueStrategy().equals(DequeueStrategy.ROUND_ROBIN)) {
hash = key.hashCode();
} else {
Integer hashFoundInEntry = item.entry.getHashKey(config.getHashKey());
hash = hashFoundInEntry == null ? 0 : hashFoundInEntry;
}
// modulo of a negative is negative, make sure we're positive or 0.
if (Math.abs(hash) % config.getGroupSize() == config.getInstanceId()) {
keys.add(key);
datas.add(item.entry.getData());
updateStartKey = false;
}
}
return keys.isEmpty() ? null : ImmutablePair.of(keys, datas);
}
use of co.cask.cdap.data2.queue.ConsumerConfig in project cdap by caskdata.
the class AbstractStreamFileConsumer method createBaseReadFilter.
private ReadFilter createBaseReadFilter(final ConsumerConfig consumerConfig) {
final int groupSize = consumerConfig.getGroupSize();
final DequeueStrategy strategy = consumerConfig.getDequeueStrategy();
if (groupSize == 1 || strategy == DequeueStrategy.FIFO) {
return ReadFilter.ALWAYS_ACCEPT;
}
// For RoundRobin and Hash partition, the claim is done by matching hashCode to instance id.
// For Hash, to preserve existing behavior, everything route to instance 0.
// For RoundRobin, the idea is to scatter the events across consumers evenly. Since there is no way to known
// about the absolute starting point to do true round robin, we employ a good enough hash function on the
// file offset as a way to spread events across consumers
final int instanceId = consumerConfig.getInstanceId();
return new ReadFilter() {
@Override
public boolean acceptOffset(long offset) {
int hashValue = Math.abs(strategy == DequeueStrategy.HASH ? 0 : ROUND_ROBIN_HASHER.hashLong(offset).hashCode());
return instanceId == (hashValue % groupSize);
}
};
}
Aggregations